VirtualBox

source: vbox/trunk/src/libs/libxml2-2.9.2/parser.c@ 63108

Last change on this file since 63108 was 58076, checked in by vboxsync, 9 years ago

upstream fixes post 2.9.2

  • Property svn:eol-style set to native
File size: 427.0 KB
Line 
1/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * daniel@veillard.com
31 */
32
33#define IN_LIBXML
34#include "libxml.h"
35
36#if defined(WIN32) && !defined (__CYGWIN__)
37#define XML_DIR_SEP '\\'
38#else
39#define XML_DIR_SEP '/'
40#endif
41
42#include <stdlib.h>
43#include <limits.h>
44#include <string.h>
45#include <stdarg.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/threads.h>
48#include <libxml/globals.h>
49#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
58#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
61#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
65#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
83#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
86
87#include "buf.h"
88#include "enc.h"
89
90static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
93static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
97/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
125 xmlEntityPtr ent, size_t replacement)
126{
127 size_t consumed = 0;
128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
133
134 /*
135 * This may look absurd but is needed to detect
136 * entities problems
137 */
138 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
139 (ent->content != NULL) && (ent->checked == 0)) {
140 unsigned long oldnbent = ctxt->nbentities;
141 xmlChar *rep;
142
143 ent->checked = 1;
144
145 rep = xmlStringDecodeEntities(ctxt, ent->content,
146 XML_SUBSTITUTE_REF, 0, 0, 0);
147
148 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
149 if (rep != NULL) {
150 if (xmlStrchr(rep, '<'))
151 ent->checked |= 1;
152 xmlFree(rep);
153 rep = NULL;
154 }
155 }
156 if (replacement != 0) {
157 if (replacement < XML_MAX_TEXT_LENGTH)
158 return(0);
159
160 /*
161 * If the volume of entity copy reaches 10 times the
162 * amount of parsed data and over the large text threshold
163 * then that's very likely to be an abuse.
164 */
165 if (ctxt->input != NULL) {
166 consumed = ctxt->input->consumed +
167 (ctxt->input->cur - ctxt->input->base);
168 }
169 consumed += ctxt->sizeentities;
170
171 if (replacement < XML_PARSER_NON_LINEAR * consumed)
172 return(0);
173 } else if (size != 0) {
174 /*
175 * Do the check based on the replacement size of the entity
176 */
177 if (size < XML_PARSER_BIG_ENTITY)
178 return(0);
179
180 /*
181 * A limit on the amount of text data reasonably used
182 */
183 if (ctxt->input != NULL) {
184 consumed = ctxt->input->consumed +
185 (ctxt->input->cur - ctxt->input->base);
186 }
187 consumed += ctxt->sizeentities;
188
189 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
190 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
191 return (0);
192 } else if (ent != NULL) {
193 /*
194 * use the number of parsed entities in the replacement
195 */
196 size = ent->checked / 2;
197
198 /*
199 * The amount of data parsed counting entities size only once
200 */
201 if (ctxt->input != NULL) {
202 consumed = ctxt->input->consumed +
203 (ctxt->input->cur - ctxt->input->base);
204 }
205 consumed += ctxt->sizeentities;
206
207 /*
208 * Check the density of entities for the amount of data
209 * knowing an entity reference will take at least 3 bytes
210 */
211 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
212 return (0);
213 } else {
214 /*
215 * strange we got no data for checking
216 */
217 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
218 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
219 (ctxt->nbentities <= 10000))
220 return (0);
221 }
222 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
223 return (1);
224}
225
226/**
227 * xmlParserMaxDepth:
228 *
229 * arbitrary depth limit for the XML documents that we allow to
230 * process. This is not a limitation of the parser but a safety
231 * boundary feature. It can be disabled with the XML_PARSE_HUGE
232 * parser option.
233 */
234unsigned int xmlParserMaxDepth = 256;
235
236
237
238#define SAX2 1
239#define XML_PARSER_BIG_BUFFER_SIZE 300
240#define XML_PARSER_BUFFER_SIZE 100
241#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
242
243/**
244 * XML_PARSER_CHUNK_SIZE
245 *
246 * When calling GROW that's the minimal amount of data
247 * the parser expected to have received. It is not a hard
248 * limit but an optimization when reading strings like Names
249 * It is not strictly needed as long as inputs available characters
250 * are followed by 0, which should be provided by the I/O level
251 */
252#define XML_PARSER_CHUNK_SIZE 100
253
254/*
255 * List of XML prefixed PI allowed by W3C specs
256 */
257
258static const char *xmlW3CPIs[] = {
259 "xml-stylesheet",
260 "xml-model",
261 NULL
262};
263
264
265/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
266static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
267 const xmlChar **str);
268
269static xmlParserErrors
270xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
271 xmlSAXHandlerPtr sax,
272 void *user_data, int depth, const xmlChar *URL,
273 const xmlChar *ID, xmlNodePtr *list);
274
275static int
276xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
277 const char *encoding);
278#ifdef LIBXML_LEGACY_ENABLED
279static void
280xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
281 xmlNodePtr lastNode);
282#endif /* LIBXML_LEGACY_ENABLED */
283
284static xmlParserErrors
285xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
286 const xmlChar *string, void *user_data, xmlNodePtr *lst);
287
288static int
289xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
290
291/************************************************************************
292 * *
293 * Some factorized error routines *
294 * *
295 ************************************************************************/
296
297/**
298 * xmlErrAttributeDup:
299 * @ctxt: an XML parser context
300 * @prefix: the attribute prefix
301 * @localname: the attribute localname
302 *
303 * Handle a redefinition of attribute error
304 */
305static void
306xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
307 const xmlChar * localname)
308{
309 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
310 (ctxt->instate == XML_PARSER_EOF))
311 return;
312 if (ctxt != NULL)
313 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
314
315 if (prefix == NULL)
316 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
317 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
318 (const char *) localname, NULL, NULL, 0, 0,
319 "Attribute %s redefined\n", localname);
320 else
321 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
322 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
323 (const char *) prefix, (const char *) localname,
324 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
325 localname);
326 if (ctxt != NULL) {
327 ctxt->wellFormed = 0;
328 if (ctxt->recovery == 0)
329 ctxt->disableSAX = 1;
330 }
331}
332
333/**
334 * xmlFatalErr:
335 * @ctxt: an XML parser context
336 * @error: the error number
337 * @extra: extra information string
338 *
339 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
340 */
341static void
342xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
343{
344 const char *errmsg;
345 char errstr[129] = "";
346
347 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
348 (ctxt->instate == XML_PARSER_EOF))
349 return;
350 switch (error) {
351 case XML_ERR_INVALID_HEX_CHARREF:
352 errmsg = "CharRef: invalid hexadecimal value";
353 break;
354 case XML_ERR_INVALID_DEC_CHARREF:
355 errmsg = "CharRef: invalid decimal value";
356 break;
357 case XML_ERR_INVALID_CHARREF:
358 errmsg = "CharRef: invalid value";
359 break;
360 case XML_ERR_INTERNAL_ERROR:
361 errmsg = "internal error";
362 break;
363 case XML_ERR_PEREF_AT_EOF:
364 errmsg = "PEReference at end of document";
365 break;
366 case XML_ERR_PEREF_IN_PROLOG:
367 errmsg = "PEReference in prolog";
368 break;
369 case XML_ERR_PEREF_IN_EPILOG:
370 errmsg = "PEReference in epilog";
371 break;
372 case XML_ERR_PEREF_NO_NAME:
373 errmsg = "PEReference: no name";
374 break;
375 case XML_ERR_PEREF_SEMICOL_MISSING:
376 errmsg = "PEReference: expecting ';'";
377 break;
378 case XML_ERR_ENTITY_LOOP:
379 errmsg = "Detected an entity reference loop";
380 break;
381 case XML_ERR_ENTITY_NOT_STARTED:
382 errmsg = "EntityValue: \" or ' expected";
383 break;
384 case XML_ERR_ENTITY_PE_INTERNAL:
385 errmsg = "PEReferences forbidden in internal subset";
386 break;
387 case XML_ERR_ENTITY_NOT_FINISHED:
388 errmsg = "EntityValue: \" or ' expected";
389 break;
390 case XML_ERR_ATTRIBUTE_NOT_STARTED:
391 errmsg = "AttValue: \" or ' expected";
392 break;
393 case XML_ERR_LT_IN_ATTRIBUTE:
394 errmsg = "Unescaped '<' not allowed in attributes values";
395 break;
396 case XML_ERR_LITERAL_NOT_STARTED:
397 errmsg = "SystemLiteral \" or ' expected";
398 break;
399 case XML_ERR_LITERAL_NOT_FINISHED:
400 errmsg = "Unfinished System or Public ID \" or ' expected";
401 break;
402 case XML_ERR_MISPLACED_CDATA_END:
403 errmsg = "Sequence ']]>' not allowed in content";
404 break;
405 case XML_ERR_URI_REQUIRED:
406 errmsg = "SYSTEM or PUBLIC, the URI is missing";
407 break;
408 case XML_ERR_PUBID_REQUIRED:
409 errmsg = "PUBLIC, the Public Identifier is missing";
410 break;
411 case XML_ERR_HYPHEN_IN_COMMENT:
412 errmsg = "Comment must not contain '--' (double-hyphen)";
413 break;
414 case XML_ERR_PI_NOT_STARTED:
415 errmsg = "xmlParsePI : no target name";
416 break;
417 case XML_ERR_RESERVED_XML_NAME:
418 errmsg = "Invalid PI name";
419 break;
420 case XML_ERR_NOTATION_NOT_STARTED:
421 errmsg = "NOTATION: Name expected here";
422 break;
423 case XML_ERR_NOTATION_NOT_FINISHED:
424 errmsg = "'>' required to close NOTATION declaration";
425 break;
426 case XML_ERR_VALUE_REQUIRED:
427 errmsg = "Entity value required";
428 break;
429 case XML_ERR_URI_FRAGMENT:
430 errmsg = "Fragment not allowed";
431 break;
432 case XML_ERR_ATTLIST_NOT_STARTED:
433 errmsg = "'(' required to start ATTLIST enumeration";
434 break;
435 case XML_ERR_NMTOKEN_REQUIRED:
436 errmsg = "NmToken expected in ATTLIST enumeration";
437 break;
438 case XML_ERR_ATTLIST_NOT_FINISHED:
439 errmsg = "')' required to finish ATTLIST enumeration";
440 break;
441 case XML_ERR_MIXED_NOT_STARTED:
442 errmsg = "MixedContentDecl : '|' or ')*' expected";
443 break;
444 case XML_ERR_PCDATA_REQUIRED:
445 errmsg = "MixedContentDecl : '#PCDATA' expected";
446 break;
447 case XML_ERR_ELEMCONTENT_NOT_STARTED:
448 errmsg = "ContentDecl : Name or '(' expected";
449 break;
450 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
451 errmsg = "ContentDecl : ',' '|' or ')' expected";
452 break;
453 case XML_ERR_PEREF_IN_INT_SUBSET:
454 errmsg =
455 "PEReference: forbidden within markup decl in internal subset";
456 break;
457 case XML_ERR_GT_REQUIRED:
458 errmsg = "expected '>'";
459 break;
460 case XML_ERR_CONDSEC_INVALID:
461 errmsg = "XML conditional section '[' expected";
462 break;
463 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
464 errmsg = "Content error in the external subset";
465 break;
466 case XML_ERR_CONDSEC_INVALID_KEYWORD:
467 errmsg =
468 "conditional section INCLUDE or IGNORE keyword expected";
469 break;
470 case XML_ERR_CONDSEC_NOT_FINISHED:
471 errmsg = "XML conditional section not closed";
472 break;
473 case XML_ERR_XMLDECL_NOT_STARTED:
474 errmsg = "Text declaration '<?xml' required";
475 break;
476 case XML_ERR_XMLDECL_NOT_FINISHED:
477 errmsg = "parsing XML declaration: '?>' expected";
478 break;
479 case XML_ERR_EXT_ENTITY_STANDALONE:
480 errmsg = "external parsed entities cannot be standalone";
481 break;
482 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
483 errmsg = "EntityRef: expecting ';'";
484 break;
485 case XML_ERR_DOCTYPE_NOT_FINISHED:
486 errmsg = "DOCTYPE improperly terminated";
487 break;
488 case XML_ERR_LTSLASH_REQUIRED:
489 errmsg = "EndTag: '</' not found";
490 break;
491 case XML_ERR_EQUAL_REQUIRED:
492 errmsg = "expected '='";
493 break;
494 case XML_ERR_STRING_NOT_CLOSED:
495 errmsg = "String not closed expecting \" or '";
496 break;
497 case XML_ERR_STRING_NOT_STARTED:
498 errmsg = "String not started expecting ' or \"";
499 break;
500 case XML_ERR_ENCODING_NAME:
501 errmsg = "Invalid XML encoding name";
502 break;
503 case XML_ERR_STANDALONE_VALUE:
504 errmsg = "standalone accepts only 'yes' or 'no'";
505 break;
506 case XML_ERR_DOCUMENT_EMPTY:
507 errmsg = "Document is empty";
508 break;
509 case XML_ERR_DOCUMENT_END:
510 errmsg = "Extra content at the end of the document";
511 break;
512 case XML_ERR_NOT_WELL_BALANCED:
513 errmsg = "chunk is not well balanced";
514 break;
515 case XML_ERR_EXTRA_CONTENT:
516 errmsg = "extra content at the end of well balanced chunk";
517 break;
518 case XML_ERR_VERSION_MISSING:
519 errmsg = "Malformed declaration expecting version";
520 break;
521 case XML_ERR_NAME_TOO_LONG:
522 errmsg = "Name too long use XML_PARSE_HUGE option";
523 break;
524#if 0
525 case:
526 errmsg = "";
527 break;
528#endif
529 default:
530 errmsg = "Unregistered error message";
531 }
532 if (info == NULL)
533 snprintf(errstr, 128, "%s\n", errmsg);
534 else
535 snprintf(errstr, 128, "%s: %%s\n", errmsg);
536 if (ctxt != NULL)
537 ctxt->errNo = error;
538 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
539 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
540 info);
541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
546}
547
548/**
549 * xmlFatalErrMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 *
554 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
555 */
556static void
557xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
558 const char *msg)
559{
560 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
561 (ctxt->instate == XML_PARSER_EOF))
562 return;
563 if (ctxt != NULL)
564 ctxt->errNo = error;
565 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
566 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
567 if (ctxt != NULL) {
568 ctxt->wellFormed = 0;
569 if (ctxt->recovery == 0)
570 ctxt->disableSAX = 1;
571 }
572}
573
574/**
575 * xmlWarningMsg:
576 * @ctxt: an XML parser context
577 * @error: the error number
578 * @msg: the error message
579 * @str1: extra data
580 * @str2: extra data
581 *
582 * Handle a warning.
583 */
584static void
585xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
586 const char *msg, const xmlChar *str1, const xmlChar *str2)
587{
588 xmlStructuredErrorFunc schannel = NULL;
589
590 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
591 (ctxt->instate == XML_PARSER_EOF))
592 return;
593 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
594 (ctxt->sax->initialized == XML_SAX2_MAGIC))
595 schannel = ctxt->sax->serror;
596 if (ctxt != NULL) {
597 __xmlRaiseError(schannel,
598 (ctxt->sax) ? ctxt->sax->warning : NULL,
599 ctxt->userData,
600 ctxt, NULL, XML_FROM_PARSER, error,
601 XML_ERR_WARNING, NULL, 0,
602 (const char *) str1, (const char *) str2, NULL, 0, 0,
603 msg, (const char *) str1, (const char *) str2);
604 } else {
605 __xmlRaiseError(schannel, NULL, NULL,
606 ctxt, NULL, XML_FROM_PARSER, error,
607 XML_ERR_WARNING, NULL, 0,
608 (const char *) str1, (const char *) str2, NULL, 0, 0,
609 msg, (const char *) str1, (const char *) str2);
610 }
611}
612
613/**
614 * xmlValidityError:
615 * @ctxt: an XML parser context
616 * @error: the error number
617 * @msg: the error message
618 * @str1: extra data
619 *
620 * Handle a validity error.
621 */
622static void
623xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
624 const char *msg, const xmlChar *str1, const xmlChar *str2)
625{
626 xmlStructuredErrorFunc schannel = NULL;
627
628 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
629 (ctxt->instate == XML_PARSER_EOF))
630 return;
631 if (ctxt != NULL) {
632 ctxt->errNo = error;
633 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
634 schannel = ctxt->sax->serror;
635 }
636 if (ctxt != NULL) {
637 __xmlRaiseError(schannel,
638 ctxt->vctxt.error, ctxt->vctxt.userData,
639 ctxt, NULL, XML_FROM_DTD, error,
640 XML_ERR_ERROR, NULL, 0, (const char *) str1,
641 (const char *) str2, NULL, 0, 0,
642 msg, (const char *) str1, (const char *) str2);
643 ctxt->valid = 0;
644 } else {
645 __xmlRaiseError(schannel, NULL, NULL,
646 ctxt, NULL, XML_FROM_DTD, error,
647 XML_ERR_ERROR, NULL, 0, (const char *) str1,
648 (const char *) str2, NULL, 0, 0,
649 msg, (const char *) str1, (const char *) str2);
650 }
651}
652
653/**
654 * xmlFatalErrMsgInt:
655 * @ctxt: an XML parser context
656 * @error: the error number
657 * @msg: the error message
658 * @val: an integer value
659 *
660 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
661 */
662static void
663xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
664 const char *msg, int val)
665{
666 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
667 (ctxt->instate == XML_PARSER_EOF))
668 return;
669 if (ctxt != NULL)
670 ctxt->errNo = error;
671 __xmlRaiseError(NULL, NULL, NULL,
672 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
673 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
674 if (ctxt != NULL) {
675 ctxt->wellFormed = 0;
676 if (ctxt->recovery == 0)
677 ctxt->disableSAX = 1;
678 }
679}
680
681/**
682 * xmlFatalErrMsgStrIntStr:
683 * @ctxt: an XML parser context
684 * @error: the error number
685 * @msg: the error message
686 * @str1: an string info
687 * @val: an integer value
688 * @str2: an string info
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692static void
693xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg, const xmlChar *str1, int val,
695 const xmlChar *str2)
696{
697 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
698 (ctxt->instate == XML_PARSER_EOF))
699 return;
700 if (ctxt != NULL)
701 ctxt->errNo = error;
702 __xmlRaiseError(NULL, NULL, NULL,
703 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
704 NULL, 0, (const char *) str1, (const char *) str2,
705 NULL, val, 0, msg, str1, val, str2);
706 if (ctxt != NULL) {
707 ctxt->wellFormed = 0;
708 if (ctxt->recovery == 0)
709 ctxt->disableSAX = 1;
710 }
711}
712
713/**
714 * xmlFatalErrMsgStr:
715 * @ctxt: an XML parser context
716 * @error: the error number
717 * @msg: the error message
718 * @val: a string value
719 *
720 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
721 */
722static void
723xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
724 const char *msg, const xmlChar * val)
725{
726 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
727 (ctxt->instate == XML_PARSER_EOF))
728 return;
729 if (ctxt != NULL)
730 ctxt->errNo = error;
731 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
732 XML_FROM_PARSER, error, XML_ERR_FATAL,
733 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
734 val);
735 if (ctxt != NULL) {
736 ctxt->wellFormed = 0;
737 if (ctxt->recovery == 0)
738 ctxt->disableSAX = 1;
739 }
740}
741
742/**
743 * xmlErrMsgStr:
744 * @ctxt: an XML parser context
745 * @error: the error number
746 * @msg: the error message
747 * @val: a string value
748 *
749 * Handle a non fatal parser error
750 */
751static void
752xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
753 const char *msg, const xmlChar * val)
754{
755 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
756 (ctxt->instate == XML_PARSER_EOF))
757 return;
758 if (ctxt != NULL)
759 ctxt->errNo = error;
760 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
761 XML_FROM_PARSER, error, XML_ERR_ERROR,
762 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
763 val);
764}
765
766/**
767 * xmlNsErr:
768 * @ctxt: an XML parser context
769 * @error: the error number
770 * @msg: the message
771 * @info1: extra information string
772 * @info2: extra information string
773 *
774 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
775 */
776static void
777xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
778 const char *msg,
779 const xmlChar * info1, const xmlChar * info2,
780 const xmlChar * info3)
781{
782 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
783 (ctxt->instate == XML_PARSER_EOF))
784 return;
785 if (ctxt != NULL)
786 ctxt->errNo = error;
787 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
788 XML_ERR_ERROR, NULL, 0, (const char *) info1,
789 (const char *) info2, (const char *) info3, 0, 0, msg,
790 info1, info2, info3);
791 if (ctxt != NULL)
792 ctxt->nsWellFormed = 0;
793}
794
795/**
796 * xmlNsWarn
797 * @ctxt: an XML parser context
798 * @error: the error number
799 * @msg: the message
800 * @info1: extra information string
801 * @info2: extra information string
802 *
803 * Handle a namespace warning error
804 */
805static void
806xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
807 const char *msg,
808 const xmlChar * info1, const xmlChar * info2,
809 const xmlChar * info3)
810{
811 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
812 (ctxt->instate == XML_PARSER_EOF))
813 return;
814 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
815 XML_ERR_WARNING, NULL, 0, (const char *) info1,
816 (const char *) info2, (const char *) info3, 0, 0, msg,
817 info1, info2, info3);
818}
819
820/************************************************************************
821 * *
822 * Library wide options *
823 * *
824 ************************************************************************/
825
826/**
827 * xmlHasFeature:
828 * @feature: the feature to be examined
829 *
830 * Examines if the library has been compiled with a given feature.
831 *
832 * Returns a non-zero value if the feature exist, otherwise zero.
833 * Returns zero (0) if the feature does not exist or an unknown
834 * unknown feature is requested, non-zero otherwise.
835 */
836int
837xmlHasFeature(xmlFeature feature)
838{
839 switch (feature) {
840 case XML_WITH_THREAD:
841#ifdef LIBXML_THREAD_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
846 case XML_WITH_TREE:
847#ifdef LIBXML_TREE_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
852 case XML_WITH_OUTPUT:
853#ifdef LIBXML_OUTPUT_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
858 case XML_WITH_PUSH:
859#ifdef LIBXML_PUSH_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
864 case XML_WITH_READER:
865#ifdef LIBXML_READER_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
870 case XML_WITH_PATTERN:
871#ifdef LIBXML_PATTERN_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
876 case XML_WITH_WRITER:
877#ifdef LIBXML_WRITER_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
882 case XML_WITH_SAX1:
883#ifdef LIBXML_SAX1_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
888 case XML_WITH_FTP:
889#ifdef LIBXML_FTP_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
894 case XML_WITH_HTTP:
895#ifdef LIBXML_HTTP_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
900 case XML_WITH_VALID:
901#ifdef LIBXML_VALID_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
906 case XML_WITH_HTML:
907#ifdef LIBXML_HTML_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
912 case XML_WITH_LEGACY:
913#ifdef LIBXML_LEGACY_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
918 case XML_WITH_C14N:
919#ifdef LIBXML_C14N_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
924 case XML_WITH_CATALOG:
925#ifdef LIBXML_CATALOG_ENABLED
926 return(1);
927#else
928 return(0);
929#endif
930 case XML_WITH_XPATH:
931#ifdef LIBXML_XPATH_ENABLED
932 return(1);
933#else
934 return(0);
935#endif
936 case XML_WITH_XPTR:
937#ifdef LIBXML_XPTR_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
942 case XML_WITH_XINCLUDE:
943#ifdef LIBXML_XINCLUDE_ENABLED
944 return(1);
945#else
946 return(0);
947#endif
948 case XML_WITH_ICONV:
949#ifdef LIBXML_ICONV_ENABLED
950 return(1);
951#else
952 return(0);
953#endif
954 case XML_WITH_ISO8859X:
955#ifdef LIBXML_ISO8859X_ENABLED
956 return(1);
957#else
958 return(0);
959#endif
960 case XML_WITH_UNICODE:
961#ifdef LIBXML_UNICODE_ENABLED
962 return(1);
963#else
964 return(0);
965#endif
966 case XML_WITH_REGEXP:
967#ifdef LIBXML_REGEXP_ENABLED
968 return(1);
969#else
970 return(0);
971#endif
972 case XML_WITH_AUTOMATA:
973#ifdef LIBXML_AUTOMATA_ENABLED
974 return(1);
975#else
976 return(0);
977#endif
978 case XML_WITH_EXPR:
979#ifdef LIBXML_EXPR_ENABLED
980 return(1);
981#else
982 return(0);
983#endif
984 case XML_WITH_SCHEMAS:
985#ifdef LIBXML_SCHEMAS_ENABLED
986 return(1);
987#else
988 return(0);
989#endif
990 case XML_WITH_SCHEMATRON:
991#ifdef LIBXML_SCHEMATRON_ENABLED
992 return(1);
993#else
994 return(0);
995#endif
996 case XML_WITH_MODULES:
997#ifdef LIBXML_MODULES_ENABLED
998 return(1);
999#else
1000 return(0);
1001#endif
1002 case XML_WITH_DEBUG:
1003#ifdef LIBXML_DEBUG_ENABLED
1004 return(1);
1005#else
1006 return(0);
1007#endif
1008 case XML_WITH_DEBUG_MEM:
1009#ifdef DEBUG_MEMORY_LOCATION
1010 return(1);
1011#else
1012 return(0);
1013#endif
1014 case XML_WITH_DEBUG_RUN:
1015#ifdef LIBXML_DEBUG_RUNTIME
1016 return(1);
1017#else
1018 return(0);
1019#endif
1020 case XML_WITH_ZLIB:
1021#ifdef LIBXML_ZLIB_ENABLED
1022 return(1);
1023#else
1024 return(0);
1025#endif
1026 case XML_WITH_LZMA:
1027#ifdef LIBXML_LZMA_ENABLED
1028 return(1);
1029#else
1030 return(0);
1031#endif
1032 case XML_WITH_ICU:
1033#ifdef LIBXML_ICU_ENABLED
1034 return(1);
1035#else
1036 return(0);
1037#endif
1038 default:
1039 break;
1040 }
1041 return(0);
1042}
1043
1044/************************************************************************
1045 * *
1046 * SAX2 defaulted attributes handling *
1047 * *
1048 ************************************************************************/
1049
1050/**
1051 * xmlDetectSAX2:
1052 * @ctxt: an XML parser context
1053 *
1054 * Do the SAX2 detection and specific intialization
1055 */
1056static void
1057xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1058 if (ctxt == NULL) return;
1059#ifdef LIBXML_SAX1_ENABLED
1060 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1061 ((ctxt->sax->startElementNs != NULL) ||
1062 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
1063#else
1064 ctxt->sax2 = 1;
1065#endif /* LIBXML_SAX1_ENABLED */
1066
1067 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1068 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1069 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1070 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1071 (ctxt->str_xml_ns == NULL)) {
1072 xmlErrMemory(ctxt, NULL);
1073 }
1074}
1075
1076typedef struct _xmlDefAttrs xmlDefAttrs;
1077typedef xmlDefAttrs *xmlDefAttrsPtr;
1078struct _xmlDefAttrs {
1079 int nbAttrs; /* number of defaulted attributes on that element */
1080 int maxAttrs; /* the size of the array */
1081 const xmlChar *values[5]; /* array of localname/prefix/values/external */
1082};
1083
1084/**
1085 * xmlAttrNormalizeSpace:
1086 * @src: the source string
1087 * @dst: the target string
1088 *
1089 * Normalize the space in non CDATA attribute values:
1090 * If the attribute type is not CDATA, then the XML processor MUST further
1091 * process the normalized attribute value by discarding any leading and
1092 * trailing space (#x20) characters, and by replacing sequences of space
1093 * (#x20) characters by a single space (#x20) character.
1094 * Note that the size of dst need to be at least src, and if one doesn't need
1095 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1096 * passing src as dst is just fine.
1097 *
1098 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1099 * is needed.
1100 */
1101static xmlChar *
1102xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1103{
1104 if ((src == NULL) || (dst == NULL))
1105 return(NULL);
1106
1107 while (*src == 0x20) src++;
1108 while (*src != 0) {
1109 if (*src == 0x20) {
1110 while (*src == 0x20) src++;
1111 if (*src != 0)
1112 *dst++ = 0x20;
1113 } else {
1114 *dst++ = *src++;
1115 }
1116 }
1117 *dst = 0;
1118 if (dst == src)
1119 return(NULL);
1120 return(dst);
1121}
1122
1123/**
1124 * xmlAttrNormalizeSpace2:
1125 * @src: the source string
1126 *
1127 * Normalize the space in non CDATA attribute values, a slightly more complex
1128 * front end to avoid allocation problems when running on attribute values
1129 * coming from the input.
1130 *
1131 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1132 * is needed.
1133 */
1134static const xmlChar *
1135xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1136{
1137 int i;
1138 int remove_head = 0;
1139 int need_realloc = 0;
1140 const xmlChar *cur;
1141
1142 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1143 return(NULL);
1144 i = *len;
1145 if (i <= 0)
1146 return(NULL);
1147
1148 cur = src;
1149 while (*cur == 0x20) {
1150 cur++;
1151 remove_head++;
1152 }
1153 while (*cur != 0) {
1154 if (*cur == 0x20) {
1155 cur++;
1156 if ((*cur == 0x20) || (*cur == 0)) {
1157 need_realloc = 1;
1158 break;
1159 }
1160 } else
1161 cur++;
1162 }
1163 if (need_realloc) {
1164 xmlChar *ret;
1165
1166 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1167 if (ret == NULL) {
1168 xmlErrMemory(ctxt, NULL);
1169 return(NULL);
1170 }
1171 xmlAttrNormalizeSpace(ret, ret);
1172 *len = (int) strlen((const char *)ret);
1173 return(ret);
1174 } else if (remove_head) {
1175 *len -= remove_head;
1176 memmove(src, src + remove_head, 1 + *len);
1177 return(src);
1178 }
1179 return(NULL);
1180}
1181
1182/**
1183 * xmlAddDefAttrs:
1184 * @ctxt: an XML parser context
1185 * @fullname: the element fullname
1186 * @fullattr: the attribute fullname
1187 * @value: the attribute value
1188 *
1189 * Add a defaulted attribute for an element
1190 */
1191static void
1192xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1193 const xmlChar *fullname,
1194 const xmlChar *fullattr,
1195 const xmlChar *value) {
1196 xmlDefAttrsPtr defaults;
1197 int len;
1198 const xmlChar *name;
1199 const xmlChar *prefix;
1200
1201 /*
1202 * Allows to detect attribute redefinitions
1203 */
1204 if (ctxt->attsSpecial != NULL) {
1205 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1206 return;
1207 }
1208
1209 if (ctxt->attsDefault == NULL) {
1210 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1211 if (ctxt->attsDefault == NULL)
1212 goto mem_error;
1213 }
1214
1215 /*
1216 * split the element name into prefix:localname , the string found
1217 * are within the DTD and then not associated to namespace names.
1218 */
1219 name = xmlSplitQName3(fullname, &len);
1220 if (name == NULL) {
1221 name = xmlDictLookup(ctxt->dict, fullname, -1);
1222 prefix = NULL;
1223 } else {
1224 name = xmlDictLookup(ctxt->dict, name, -1);
1225 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1226 }
1227
1228 /*
1229 * make sure there is some storage
1230 */
1231 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1232 if (defaults == NULL) {
1233 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1234 (4 * 5) * sizeof(const xmlChar *));
1235 if (defaults == NULL)
1236 goto mem_error;
1237 defaults->nbAttrs = 0;
1238 defaults->maxAttrs = 4;
1239 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1240 defaults, NULL) < 0) {
1241 xmlFree(defaults);
1242 goto mem_error;
1243 }
1244 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1245 xmlDefAttrsPtr temp;
1246
1247 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1248 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1249 if (temp == NULL)
1250 goto mem_error;
1251 defaults = temp;
1252 defaults->maxAttrs *= 2;
1253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1257 }
1258 }
1259
1260 /*
1261 * Split the element name into prefix:localname , the string found
1262 * are within the DTD and hen not associated to namespace names.
1263 */
1264 name = xmlSplitQName3(fullattr, &len);
1265 if (name == NULL) {
1266 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1267 prefix = NULL;
1268 } else {
1269 name = xmlDictLookup(ctxt->dict, name, -1);
1270 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1271 }
1272
1273 defaults->values[5 * defaults->nbAttrs] = name;
1274 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1275 /* intern the string and precompute the end */
1276 len = xmlStrlen(value);
1277 value = xmlDictLookup(ctxt->dict, value, len);
1278 defaults->values[5 * defaults->nbAttrs + 2] = value;
1279 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1280 if (ctxt->external)
1281 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1282 else
1283 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1284 defaults->nbAttrs++;
1285
1286 return;
1287
1288mem_error:
1289 xmlErrMemory(ctxt, NULL);
1290 return;
1291}
1292
1293/**
1294 * xmlAddSpecialAttr:
1295 * @ctxt: an XML parser context
1296 * @fullname: the element fullname
1297 * @fullattr: the attribute fullname
1298 * @type: the attribute type
1299 *
1300 * Register this attribute type
1301 */
1302static void
1303xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1304 const xmlChar *fullname,
1305 const xmlChar *fullattr,
1306 int type)
1307{
1308 if (ctxt->attsSpecial == NULL) {
1309 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1310 if (ctxt->attsSpecial == NULL)
1311 goto mem_error;
1312 }
1313
1314 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1315 return;
1316
1317 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1318 (void *) (long) type);
1319 return;
1320
1321mem_error:
1322 xmlErrMemory(ctxt, NULL);
1323 return;
1324}
1325
1326/**
1327 * xmlCleanSpecialAttrCallback:
1328 *
1329 * Removes CDATA attributes from the special attribute table
1330 */
1331static void
1332xmlCleanSpecialAttrCallback(void *payload, void *data,
1333 const xmlChar *fullname, const xmlChar *fullattr,
1334 const xmlChar *unused ATTRIBUTE_UNUSED) {
1335 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1336
1337 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
1338 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1339 }
1340}
1341
1342/**
1343 * xmlCleanSpecialAttr:
1344 * @ctxt: an XML parser context
1345 *
1346 * Trim the list of attributes defined to remove all those of type
1347 * CDATA as they are not special. This call should be done when finishing
1348 * to parse the DTD and before starting to parse the document root.
1349 */
1350static void
1351xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1352{
1353 if (ctxt->attsSpecial == NULL)
1354 return;
1355
1356 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1357
1358 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1359 xmlHashFree(ctxt->attsSpecial, NULL);
1360 ctxt->attsSpecial = NULL;
1361 }
1362 return;
1363}
1364
1365/**
1366 * xmlCheckLanguageID:
1367 * @lang: pointer to the string value
1368 *
1369 * Checks that the value conforms to the LanguageID production:
1370 *
1371 * NOTE: this is somewhat deprecated, those productions were removed from
1372 * the XML Second edition.
1373 *
1374 * [33] LanguageID ::= Langcode ('-' Subcode)*
1375 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1376 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1377 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1378 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1379 * [38] Subcode ::= ([a-z] | [A-Z])+
1380 *
1381 * The current REC reference the sucessors of RFC 1766, currently 5646
1382 *
1383 * http://www.rfc-editor.org/rfc/rfc5646.txt
1384 * langtag = language
1385 * ["-" script]
1386 * ["-" region]
1387 * *("-" variant)
1388 * *("-" extension)
1389 * ["-" privateuse]
1390 * language = 2*3ALPHA ; shortest ISO 639 code
1391 * ["-" extlang] ; sometimes followed by
1392 * ; extended language subtags
1393 * / 4ALPHA ; or reserved for future use
1394 * / 5*8ALPHA ; or registered language subtag
1395 *
1396 * extlang = 3ALPHA ; selected ISO 639 codes
1397 * *2("-" 3ALPHA) ; permanently reserved
1398 *
1399 * script = 4ALPHA ; ISO 15924 code
1400 *
1401 * region = 2ALPHA ; ISO 3166-1 code
1402 * / 3DIGIT ; UN M.49 code
1403 *
1404 * variant = 5*8alphanum ; registered variants
1405 * / (DIGIT 3alphanum)
1406 *
1407 * extension = singleton 1*("-" (2*8alphanum))
1408 *
1409 * ; Single alphanumerics
1410 * ; "x" reserved for private use
1411 * singleton = DIGIT ; 0 - 9
1412 * / %x41-57 ; A - W
1413 * / %x59-5A ; Y - Z
1414 * / %x61-77 ; a - w
1415 * / %x79-7A ; y - z
1416 *
1417 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1418 * The parser below doesn't try to cope with extension or privateuse
1419 * that could be added but that's not interoperable anyway
1420 *
1421 * Returns 1 if correct 0 otherwise
1422 **/
1423int
1424xmlCheckLanguageID(const xmlChar * lang)
1425{
1426 const xmlChar *cur = lang, *nxt;
1427
1428 if (cur == NULL)
1429 return (0);
1430 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1431 ((cur[0] == 'I') && (cur[1] == '-')) ||
1432 ((cur[0] == 'x') && (cur[1] == '-')) ||
1433 ((cur[0] == 'X') && (cur[1] == '-'))) {
1434 /*
1435 * Still allow IANA code and user code which were coming
1436 * from the previous version of the XML-1.0 specification
1437 * it's deprecated but we should not fail
1438 */
1439 cur += 2;
1440 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1441 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1442 cur++;
1443 return(cur[0] == 0);
1444 }
1445 nxt = cur;
1446 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1447 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1448 nxt++;
1449 if (nxt - cur >= 4) {
1450 /*
1451 * Reserved
1452 */
1453 if ((nxt - cur > 8) || (nxt[0] != 0))
1454 return(0);
1455 return(1);
1456 }
1457 if (nxt - cur < 2)
1458 return(0);
1459 /* we got an ISO 639 code */
1460 if (nxt[0] == 0)
1461 return(1);
1462 if (nxt[0] != '-')
1463 return(0);
1464
1465 nxt++;
1466 cur = nxt;
1467 /* now we can have extlang or script or region or variant */
1468 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1469 goto region_m49;
1470
1471 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1472 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1473 nxt++;
1474 if (nxt - cur == 4)
1475 goto script;
1476 if (nxt - cur == 2)
1477 goto region;
1478 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1479 goto variant;
1480 if (nxt - cur != 3)
1481 return(0);
1482 /* we parsed an extlang */
1483 if (nxt[0] == 0)
1484 return(1);
1485 if (nxt[0] != '-')
1486 return(0);
1487
1488 nxt++;
1489 cur = nxt;
1490 /* now we can have script or region or variant */
1491 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1492 goto region_m49;
1493
1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496 nxt++;
1497 if (nxt - cur == 2)
1498 goto region;
1499 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1500 goto variant;
1501 if (nxt - cur != 4)
1502 return(0);
1503 /* we parsed a script */
1504script:
1505 if (nxt[0] == 0)
1506 return(1);
1507 if (nxt[0] != '-')
1508 return(0);
1509
1510 nxt++;
1511 cur = nxt;
1512 /* now we can have region or variant */
1513 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1514 goto region_m49;
1515
1516 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1517 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1518 nxt++;
1519
1520 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1521 goto variant;
1522 if (nxt - cur != 2)
1523 return(0);
1524 /* we parsed a region */
1525region:
1526 if (nxt[0] == 0)
1527 return(1);
1528 if (nxt[0] != '-')
1529 return(0);
1530
1531 nxt++;
1532 cur = nxt;
1533 /* now we can just have a variant */
1534 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1535 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1536 nxt++;
1537
1538 if ((nxt - cur < 5) || (nxt - cur > 8))
1539 return(0);
1540
1541 /* we parsed a variant */
1542variant:
1543 if (nxt[0] == 0)
1544 return(1);
1545 if (nxt[0] != '-')
1546 return(0);
1547 /* extensions and private use subtags not checked */
1548 return (1);
1549
1550region_m49:
1551 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1552 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1553 nxt += 3;
1554 goto region;
1555 }
1556 return(0);
1557}
1558
1559/************************************************************************
1560 * *
1561 * Parser stacks related functions and macros *
1562 * *
1563 ************************************************************************/
1564
1565static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1566 const xmlChar ** str);
1567
1568#ifdef SAX2
1569/**
1570 * nsPush:
1571 * @ctxt: an XML parser context
1572 * @prefix: the namespace prefix or NULL
1573 * @URL: the namespace name
1574 *
1575 * Pushes a new parser namespace on top of the ns stack
1576 *
1577 * Returns -1 in case of error, -2 if the namespace should be discarded
1578 * and the index in the stack otherwise.
1579 */
1580static int
1581nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1582{
1583 if (ctxt->options & XML_PARSE_NSCLEAN) {
1584 int i;
1585 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1586 if (ctxt->nsTab[i] == prefix) {
1587 /* in scope */
1588 if (ctxt->nsTab[i + 1] == URL)
1589 return(-2);
1590 /* out of scope keep it */
1591 break;
1592 }
1593 }
1594 }
1595 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1596 ctxt->nsMax = 10;
1597 ctxt->nsNr = 0;
1598 ctxt->nsTab = (const xmlChar **)
1599 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1600 if (ctxt->nsTab == NULL) {
1601 xmlErrMemory(ctxt, NULL);
1602 ctxt->nsMax = 0;
1603 return (-1);
1604 }
1605 } else if (ctxt->nsNr >= ctxt->nsMax) {
1606 const xmlChar ** tmp;
1607 ctxt->nsMax *= 2;
1608 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1609 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1610 if (tmp == NULL) {
1611 xmlErrMemory(ctxt, NULL);
1612 ctxt->nsMax /= 2;
1613 return (-1);
1614 }
1615 ctxt->nsTab = tmp;
1616 }
1617 ctxt->nsTab[ctxt->nsNr++] = prefix;
1618 ctxt->nsTab[ctxt->nsNr++] = URL;
1619 return (ctxt->nsNr);
1620}
1621/**
1622 * nsPop:
1623 * @ctxt: an XML parser context
1624 * @nr: the number to pop
1625 *
1626 * Pops the top @nr parser prefix/namespace from the ns stack
1627 *
1628 * Returns the number of namespaces removed
1629 */
1630static int
1631nsPop(xmlParserCtxtPtr ctxt, int nr)
1632{
1633 int i;
1634
1635 if (ctxt->nsTab == NULL) return(0);
1636 if (ctxt->nsNr < nr) {
1637 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1638 nr = ctxt->nsNr;
1639 }
1640 if (ctxt->nsNr <= 0)
1641 return (0);
1642
1643 for (i = 0;i < nr;i++) {
1644 ctxt->nsNr--;
1645 ctxt->nsTab[ctxt->nsNr] = NULL;
1646 }
1647 return(nr);
1648}
1649#endif
1650
1651static int
1652xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1653 const xmlChar **atts;
1654 int *attallocs;
1655 int maxatts;
1656
1657 if (ctxt->atts == NULL) {
1658 maxatts = 55; /* allow for 10 attrs by default */
1659 atts = (const xmlChar **)
1660 xmlMalloc(maxatts * sizeof(xmlChar *));
1661 if (atts == NULL) goto mem_error;
1662 ctxt->atts = atts;
1663 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1664 if (attallocs == NULL) goto mem_error;
1665 ctxt->attallocs = attallocs;
1666 ctxt->maxatts = maxatts;
1667 } else if (nr + 5 > ctxt->maxatts) {
1668 maxatts = (nr + 5) * 2;
1669 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1670 maxatts * sizeof(const xmlChar *));
1671 if (atts == NULL) goto mem_error;
1672 ctxt->atts = atts;
1673 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1674 (maxatts / 5) * sizeof(int));
1675 if (attallocs == NULL) goto mem_error;
1676 ctxt->attallocs = attallocs;
1677 ctxt->maxatts = maxatts;
1678 }
1679 return(ctxt->maxatts);
1680mem_error:
1681 xmlErrMemory(ctxt, NULL);
1682 return(-1);
1683}
1684
1685/**
1686 * inputPush:
1687 * @ctxt: an XML parser context
1688 * @value: the parser input
1689 *
1690 * Pushes a new parser input on top of the input stack
1691 *
1692 * Returns -1 in case of error, the index in the stack otherwise
1693 */
1694int
1695inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1696{
1697 if ((ctxt == NULL) || (value == NULL))
1698 return(-1);
1699 if (ctxt->inputNr >= ctxt->inputMax) {
1700 ctxt->inputMax *= 2;
1701 ctxt->inputTab =
1702 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1703 ctxt->inputMax *
1704 sizeof(ctxt->inputTab[0]));
1705 if (ctxt->inputTab == NULL) {
1706 xmlErrMemory(ctxt, NULL);
1707 xmlFreeInputStream(value);
1708 ctxt->inputMax /= 2;
1709 value = NULL;
1710 return (-1);
1711 }
1712 }
1713 ctxt->inputTab[ctxt->inputNr] = value;
1714 ctxt->input = value;
1715 return (ctxt->inputNr++);
1716}
1717/**
1718 * inputPop:
1719 * @ctxt: an XML parser context
1720 *
1721 * Pops the top parser input from the input stack
1722 *
1723 * Returns the input just removed
1724 */
1725xmlParserInputPtr
1726inputPop(xmlParserCtxtPtr ctxt)
1727{
1728 xmlParserInputPtr ret;
1729
1730 if (ctxt == NULL)
1731 return(NULL);
1732 if (ctxt->inputNr <= 0)
1733 return (NULL);
1734 ctxt->inputNr--;
1735 if (ctxt->inputNr > 0)
1736 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1737 else
1738 ctxt->input = NULL;
1739 ret = ctxt->inputTab[ctxt->inputNr];
1740 ctxt->inputTab[ctxt->inputNr] = NULL;
1741 return (ret);
1742}
1743/**
1744 * nodePush:
1745 * @ctxt: an XML parser context
1746 * @value: the element node
1747 *
1748 * Pushes a new element node on top of the node stack
1749 *
1750 * Returns -1 in case of error, the index in the stack otherwise
1751 */
1752int
1753nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1754{
1755 if (ctxt == NULL) return(0);
1756 if (ctxt->nodeNr >= ctxt->nodeMax) {
1757 xmlNodePtr *tmp;
1758
1759 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1760 ctxt->nodeMax * 2 *
1761 sizeof(ctxt->nodeTab[0]));
1762 if (tmp == NULL) {
1763 xmlErrMemory(ctxt, NULL);
1764 return (-1);
1765 }
1766 ctxt->nodeTab = tmp;
1767 ctxt->nodeMax *= 2;
1768 }
1769 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1770 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1771 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1772 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1773 xmlParserMaxDepth);
1774 ctxt->instate = XML_PARSER_EOF;
1775 return(-1);
1776 }
1777 ctxt->nodeTab[ctxt->nodeNr] = value;
1778 ctxt->node = value;
1779 return (ctxt->nodeNr++);
1780}
1781
1782/**
1783 * nodePop:
1784 * @ctxt: an XML parser context
1785 *
1786 * Pops the top element node from the node stack
1787 *
1788 * Returns the node just removed
1789 */
1790xmlNodePtr
1791nodePop(xmlParserCtxtPtr ctxt)
1792{
1793 xmlNodePtr ret;
1794
1795 if (ctxt == NULL) return(NULL);
1796 if (ctxt->nodeNr <= 0)
1797 return (NULL);
1798 ctxt->nodeNr--;
1799 if (ctxt->nodeNr > 0)
1800 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1801 else
1802 ctxt->node = NULL;
1803 ret = ctxt->nodeTab[ctxt->nodeNr];
1804 ctxt->nodeTab[ctxt->nodeNr] = NULL;
1805 return (ret);
1806}
1807
1808#ifdef LIBXML_PUSH_ENABLED
1809/**
1810 * nameNsPush:
1811 * @ctxt: an XML parser context
1812 * @value: the element name
1813 * @prefix: the element prefix
1814 * @URI: the element namespace name
1815 *
1816 * Pushes a new element name/prefix/URL on top of the name stack
1817 *
1818 * Returns -1 in case of error, the index in the stack otherwise
1819 */
1820static int
1821nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1822 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1823{
1824 if (ctxt->nameNr >= ctxt->nameMax) {
1825 const xmlChar * *tmp;
1826 void **tmp2;
1827 ctxt->nameMax *= 2;
1828 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1829 ctxt->nameMax *
1830 sizeof(ctxt->nameTab[0]));
1831 if (tmp == NULL) {
1832 ctxt->nameMax /= 2;
1833 goto mem_error;
1834 }
1835 ctxt->nameTab = tmp;
1836 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1837 ctxt->nameMax * 3 *
1838 sizeof(ctxt->pushTab[0]));
1839 if (tmp2 == NULL) {
1840 ctxt->nameMax /= 2;
1841 goto mem_error;
1842 }
1843 ctxt->pushTab = tmp2;
1844 }
1845 ctxt->nameTab[ctxt->nameNr] = value;
1846 ctxt->name = value;
1847 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1848 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
1849 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
1850 return (ctxt->nameNr++);
1851mem_error:
1852 xmlErrMemory(ctxt, NULL);
1853 return (-1);
1854}
1855/**
1856 * nameNsPop:
1857 * @ctxt: an XML parser context
1858 *
1859 * Pops the top element/prefix/URI name from the name stack
1860 *
1861 * Returns the name just removed
1862 */
1863static const xmlChar *
1864nameNsPop(xmlParserCtxtPtr ctxt)
1865{
1866 const xmlChar *ret;
1867
1868 if (ctxt->nameNr <= 0)
1869 return (NULL);
1870 ctxt->nameNr--;
1871 if (ctxt->nameNr > 0)
1872 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1873 else
1874 ctxt->name = NULL;
1875 ret = ctxt->nameTab[ctxt->nameNr];
1876 ctxt->nameTab[ctxt->nameNr] = NULL;
1877 return (ret);
1878}
1879#endif /* LIBXML_PUSH_ENABLED */
1880
1881/**
1882 * namePush:
1883 * @ctxt: an XML parser context
1884 * @value: the element name
1885 *
1886 * Pushes a new element name on top of the name stack
1887 *
1888 * Returns -1 in case of error, the index in the stack otherwise
1889 */
1890int
1891namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1892{
1893 if (ctxt == NULL) return (-1);
1894
1895 if (ctxt->nameNr >= ctxt->nameMax) {
1896 const xmlChar * *tmp;
1897 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1898 ctxt->nameMax * 2 *
1899 sizeof(ctxt->nameTab[0]));
1900 if (tmp == NULL) {
1901 goto mem_error;
1902 }
1903 ctxt->nameTab = tmp;
1904 ctxt->nameMax *= 2;
1905 }
1906 ctxt->nameTab[ctxt->nameNr] = value;
1907 ctxt->name = value;
1908 return (ctxt->nameNr++);
1909mem_error:
1910 xmlErrMemory(ctxt, NULL);
1911 return (-1);
1912}
1913/**
1914 * namePop:
1915 * @ctxt: an XML parser context
1916 *
1917 * Pops the top element name from the name stack
1918 *
1919 * Returns the name just removed
1920 */
1921const xmlChar *
1922namePop(xmlParserCtxtPtr ctxt)
1923{
1924 const xmlChar *ret;
1925
1926 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1927 return (NULL);
1928 ctxt->nameNr--;
1929 if (ctxt->nameNr > 0)
1930 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1931 else
1932 ctxt->name = NULL;
1933 ret = ctxt->nameTab[ctxt->nameNr];
1934 ctxt->nameTab[ctxt->nameNr] = NULL;
1935 return (ret);
1936}
1937
1938static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1939 if (ctxt->spaceNr >= ctxt->spaceMax) {
1940 int *tmp;
1941
1942 ctxt->spaceMax *= 2;
1943 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1944 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1945 if (tmp == NULL) {
1946 xmlErrMemory(ctxt, NULL);
1947 ctxt->spaceMax /=2;
1948 return(-1);
1949 }
1950 ctxt->spaceTab = tmp;
1951 }
1952 ctxt->spaceTab[ctxt->spaceNr] = val;
1953 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1954 return(ctxt->spaceNr++);
1955}
1956
1957static int spacePop(xmlParserCtxtPtr ctxt) {
1958 int ret;
1959 if (ctxt->spaceNr <= 0) return(0);
1960 ctxt->spaceNr--;
1961 if (ctxt->spaceNr > 0)
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1963 else
1964 ctxt->space = &ctxt->spaceTab[0];
1965 ret = ctxt->spaceTab[ctxt->spaceNr];
1966 ctxt->spaceTab[ctxt->spaceNr] = -1;
1967 return(ret);
1968}
1969
1970/*
1971 * Macros for accessing the content. Those should be used only by the parser,
1972 * and not exported.
1973 *
1974 * Dirty macros, i.e. one often need to make assumption on the context to
1975 * use them
1976 *
1977 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1978 * To be used with extreme caution since operations consuming
1979 * characters may move the input buffer to a different location !
1980 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1981 * This should be used internally by the parser
1982 * only to compare to ASCII values otherwise it would break when
1983 * running with UTF-8 encoding.
1984 * RAW same as CUR but in the input buffer, bypass any token
1985 * extraction that may have been done
1986 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1987 * to compare on ASCII based substring.
1988 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1989 * strings without newlines within the parser.
1990 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1991 * defined char within the parser.
1992 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1993 *
1994 * NEXT Skip to the next character, this does the proper decoding
1995 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1996 * NEXTL(l) Skip the current unicode character of l xmlChars long.
1997 * CUR_CHAR(l) returns the current unicode character (int), set l
1998 * to the number of xmlChars used for the encoding [0-5].
1999 * CUR_SCHAR same but operate on a string instead of the context
2000 * COPY_BUF copy the current unicode char to the target buffer, increment
2001 * the index
2002 * GROW, SHRINK handling of input buffers
2003 */
2004
2005#define RAW (*ctxt->input->cur)
2006#define CUR (*ctxt->input->cur)
2007#define NXT(val) ctxt->input->cur[(val)]
2008#define CUR_PTR ctxt->input->cur
2009
2010#define CMP4( s, c1, c2, c3, c4 ) \
2011 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2012 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2013#define CMP5( s, c1, c2, c3, c4, c5 ) \
2014 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2015#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2016 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2017#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2018 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2019#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2020 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2021#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2022 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2023 ((unsigned char *) s)[ 8 ] == c9 )
2024#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2025 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2026 ((unsigned char *) s)[ 9 ] == c10 )
2027
2028#define SKIP(val) do { \
2029 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
2030 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2031 if ((*ctxt->input->cur == 0) && \
2032 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2033 xmlPopInput(ctxt); \
2034 } while (0)
2035
2036#define SKIPL(val) do { \
2037 int skipl; \
2038 for(skipl=0; skipl<val; skipl++) { \
2039 if (*(ctxt->input->cur) == '\n') { \
2040 ctxt->input->line++; ctxt->input->col = 1; \
2041 } else ctxt->input->col++; \
2042 ctxt->nbChars++; \
2043 ctxt->input->cur++; \
2044 } \
2045 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2046 if ((*ctxt->input->cur == 0) && \
2047 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2048 xmlPopInput(ctxt); \
2049 } while (0)
2050
2051#define SHRINK if ((ctxt->progressive == 0) && \
2052 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2053 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2054 xmlSHRINK (ctxt);
2055
2056static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2057 xmlParserInputShrink(ctxt->input);
2058 if ((*ctxt->input->cur == 0) &&
2059 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2060 xmlPopInput(ctxt);
2061 }
2062
2063#define GROW if ((ctxt->progressive == 0) && \
2064 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2065 xmlGROW (ctxt);
2066
2067static void xmlGROW (xmlParserCtxtPtr ctxt) {
2068 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2069 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2070
2071 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2072 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
2073 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
2074 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2075 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2076 ctxt->instate = XML_PARSER_EOF;
2077 }
2078 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2079 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
2080 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2081 xmlPopInput(ctxt);
2082}
2083
2084#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2085
2086#define NEXT xmlNextChar(ctxt)
2087
2088#define NEXT1 { \
2089 ctxt->input->col++; \
2090 ctxt->input->cur++; \
2091 ctxt->nbChars++; \
2092 if (*ctxt->input->cur == 0) \
2093 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2094 }
2095
2096#define NEXTL(l) do { \
2097 if (*(ctxt->input->cur) == '\n') { \
2098 ctxt->input->line++; ctxt->input->col = 1; \
2099 } else ctxt->input->col++; \
2100 ctxt->input->cur += l; \
2101 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2102 } while (0)
2103
2104#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2105#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2106
2107#define COPY_BUF(l,b,i,v) \
2108 if (l == 1) b[i++] = (xmlChar) v; \
2109 else i += xmlCopyCharMultiByte(&b[i],v)
2110
2111/**
2112 * xmlSkipBlankChars:
2113 * @ctxt: the XML parser context
2114 *
2115 * skip all blanks character found at that point in the input streams.
2116 * It pops up finished entities in the process if allowable at that point.
2117 *
2118 * Returns the number of space chars skipped
2119 */
2120
2121int
2122xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2123 int res = 0;
2124
2125 /*
2126 * It's Okay to use CUR/NEXT here since all the blanks are on
2127 * the ASCII range.
2128 */
2129 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2130 const xmlChar *cur;
2131 /*
2132 * if we are in the document content, go really fast
2133 */
2134 cur = ctxt->input->cur;
2135 while (IS_BLANK_CH(*cur)) {
2136 if (*cur == '\n') {
2137 ctxt->input->line++; ctxt->input->col = 1;
2138 } else {
2139 ctxt->input->col++;
2140 }
2141 cur++;
2142 res++;
2143 if (*cur == 0) {
2144 ctxt->input->cur = cur;
2145 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2146 cur = ctxt->input->cur;
2147 }
2148 }
2149 ctxt->input->cur = cur;
2150 } else {
2151 int cur;
2152 do {
2153 cur = CUR;
2154 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
2155 NEXT;
2156 cur = CUR;
2157 res++;
2158 }
2159 while ((cur == 0) && (ctxt->inputNr > 1) &&
2160 (ctxt->instate != XML_PARSER_COMMENT)) {
2161 xmlPopInput(ctxt);
2162 cur = CUR;
2163 }
2164 /*
2165 * Need to handle support of entities branching here
2166 */
2167 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2168 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2169 }
2170 return(res);
2171}
2172
2173/************************************************************************
2174 * *
2175 * Commodity functions to handle entities *
2176 * *
2177 ************************************************************************/
2178
2179/**
2180 * xmlPopInput:
2181 * @ctxt: an XML parser context
2182 *
2183 * xmlPopInput: the current input pointed by ctxt->input came to an end
2184 * pop it and return the next char.
2185 *
2186 * Returns the current xmlChar in the parser context
2187 */
2188xmlChar
2189xmlPopInput(xmlParserCtxtPtr ctxt) {
2190 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2191 if (xmlParserDebugEntities)
2192 xmlGenericError(xmlGenericErrorContext,
2193 "Popping input %d\n", ctxt->inputNr);
2194 xmlFreeInputStream(inputPop(ctxt));
2195 if ((*ctxt->input->cur == 0) &&
2196 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2197 return(xmlPopInput(ctxt));
2198 return(CUR);
2199}
2200
2201/**
2202 * xmlPushInput:
2203 * @ctxt: an XML parser context
2204 * @input: an XML parser input fragment (entity, XML fragment ...).
2205 *
2206 * xmlPushInput: switch to a new input stream which is stacked on top
2207 * of the previous one(s).
2208 * Returns -1 in case of error or the index in the input stack
2209 */
2210int
2211xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2212 int ret;
2213 if (input == NULL) return(-1);
2214
2215 if (xmlParserDebugEntities) {
2216 if ((ctxt->input != NULL) && (ctxt->input->filename))
2217 xmlGenericError(xmlGenericErrorContext,
2218 "%s(%d): ", ctxt->input->filename,
2219 ctxt->input->line);
2220 xmlGenericError(xmlGenericErrorContext,
2221 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2222 }
2223 ret = inputPush(ctxt, input);
2224 if (ctxt->instate == XML_PARSER_EOF)
2225 return(-1);
2226 GROW;
2227 return(ret);
2228}
2229
2230/**
2231 * xmlParseCharRef:
2232 * @ctxt: an XML parser context
2233 *
2234 * parse Reference declarations
2235 *
2236 * [66] CharRef ::= '&#' [0-9]+ ';' |
2237 * '&#x' [0-9a-fA-F]+ ';'
2238 *
2239 * [ WFC: Legal Character ]
2240 * Characters referred to using character references must match the
2241 * production for Char.
2242 *
2243 * Returns the value parsed (as an int), 0 in case of error
2244 */
2245int
2246xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2247 unsigned int val = 0;
2248 int count = 0;
2249 unsigned int outofrange = 0;
2250
2251 /*
2252 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2253 */
2254 if ((RAW == '&') && (NXT(1) == '#') &&
2255 (NXT(2) == 'x')) {
2256 SKIP(3);
2257 GROW;
2258 while (RAW != ';') { /* loop blocked by count */
2259 if (count++ > 20) {
2260 count = 0;
2261 GROW;
2262 if (ctxt->instate == XML_PARSER_EOF)
2263 return(0);
2264 }
2265 if ((RAW >= '0') && (RAW <= '9'))
2266 val = val * 16 + (CUR - '0');
2267 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2268 val = val * 16 + (CUR - 'a') + 10;
2269 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2270 val = val * 16 + (CUR - 'A') + 10;
2271 else {
2272 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2273 val = 0;
2274 break;
2275 }
2276 if (val > 0x10FFFF)
2277 outofrange = val;
2278
2279 NEXT;
2280 count++;
2281 }
2282 if (RAW == ';') {
2283 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2284 ctxt->input->col++;
2285 ctxt->nbChars ++;
2286 ctxt->input->cur++;
2287 }
2288 } else if ((RAW == '&') && (NXT(1) == '#')) {
2289 SKIP(2);
2290 GROW;
2291 while (RAW != ';') { /* loop blocked by count */
2292 if (count++ > 20) {
2293 count = 0;
2294 GROW;
2295 if (ctxt->instate == XML_PARSER_EOF)
2296 return(0);
2297 }
2298 if ((RAW >= '0') && (RAW <= '9'))
2299 val = val * 10 + (CUR - '0');
2300 else {
2301 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2302 val = 0;
2303 break;
2304 }
2305 if (val > 0x10FFFF)
2306 outofrange = val;
2307
2308 NEXT;
2309 count++;
2310 }
2311 if (RAW == ';') {
2312 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2313 ctxt->input->col++;
2314 ctxt->nbChars ++;
2315 ctxt->input->cur++;
2316 }
2317 } else {
2318 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2319 }
2320
2321 /*
2322 * [ WFC: Legal Character ]
2323 * Characters referred to using character references must match the
2324 * production for Char.
2325 */
2326 if ((IS_CHAR(val) && (outofrange == 0))) {
2327 return(val);
2328 } else {
2329 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2330 "xmlParseCharRef: invalid xmlChar value %d\n",
2331 val);
2332 }
2333 return(0);
2334}
2335
2336/**
2337 * xmlParseStringCharRef:
2338 * @ctxt: an XML parser context
2339 * @str: a pointer to an index in the string
2340 *
2341 * parse Reference declarations, variant parsing from a string rather
2342 * than an an input flow.
2343 *
2344 * [66] CharRef ::= '&#' [0-9]+ ';' |
2345 * '&#x' [0-9a-fA-F]+ ';'
2346 *
2347 * [ WFC: Legal Character ]
2348 * Characters referred to using character references must match the
2349 * production for Char.
2350 *
2351 * Returns the value parsed (as an int), 0 in case of error, str will be
2352 * updated to the current value of the index
2353 */
2354static int
2355xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2356 const xmlChar *ptr;
2357 xmlChar cur;
2358 unsigned int val = 0;
2359 unsigned int outofrange = 0;
2360
2361 if ((str == NULL) || (*str == NULL)) return(0);
2362 ptr = *str;
2363 cur = *ptr;
2364 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2365 ptr += 3;
2366 cur = *ptr;
2367 while (cur != ';') { /* Non input consuming loop */
2368 if ((cur >= '0') && (cur <= '9'))
2369 val = val * 16 + (cur - '0');
2370 else if ((cur >= 'a') && (cur <= 'f'))
2371 val = val * 16 + (cur - 'a') + 10;
2372 else if ((cur >= 'A') && (cur <= 'F'))
2373 val = val * 16 + (cur - 'A') + 10;
2374 else {
2375 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2376 val = 0;
2377 break;
2378 }
2379 if (val > 0x10FFFF)
2380 outofrange = val;
2381
2382 ptr++;
2383 cur = *ptr;
2384 }
2385 if (cur == ';')
2386 ptr++;
2387 } else if ((cur == '&') && (ptr[1] == '#')){
2388 ptr += 2;
2389 cur = *ptr;
2390 while (cur != ';') { /* Non input consuming loops */
2391 if ((cur >= '0') && (cur <= '9'))
2392 val = val * 10 + (cur - '0');
2393 else {
2394 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2395 val = 0;
2396 break;
2397 }
2398 if (val > 0x10FFFF)
2399 outofrange = val;
2400
2401 ptr++;
2402 cur = *ptr;
2403 }
2404 if (cur == ';')
2405 ptr++;
2406 } else {
2407 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2408 return(0);
2409 }
2410 *str = ptr;
2411
2412 /*
2413 * [ WFC: Legal Character ]
2414 * Characters referred to using character references must match the
2415 * production for Char.
2416 */
2417 if ((IS_CHAR(val) && (outofrange == 0))) {
2418 return(val);
2419 } else {
2420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2421 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2422 val);
2423 }
2424 return(0);
2425}
2426
2427/**
2428 * xmlNewBlanksWrapperInputStream:
2429 * @ctxt: an XML parser context
2430 * @entity: an Entity pointer
2431 *
2432 * Create a new input stream for wrapping
2433 * blanks around a PEReference
2434 *
2435 * Returns the new input stream or NULL
2436 */
2437
2438static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2439
2440static xmlParserInputPtr
2441xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2442 xmlParserInputPtr input;
2443 xmlChar *buffer;
2444 size_t length;
2445 if (entity == NULL) {
2446 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2447 "xmlNewBlanksWrapperInputStream entity\n");
2448 return(NULL);
2449 }
2450 if (xmlParserDebugEntities)
2451 xmlGenericError(xmlGenericErrorContext,
2452 "new blanks wrapper for entity: %s\n", entity->name);
2453 input = xmlNewInputStream(ctxt);
2454 if (input == NULL) {
2455 return(NULL);
2456 }
2457 length = xmlStrlen(entity->name) + 5;
2458 buffer = xmlMallocAtomic(length);
2459 if (buffer == NULL) {
2460 xmlErrMemory(ctxt, NULL);
2461 xmlFree(input);
2462 return(NULL);
2463 }
2464 buffer [0] = ' ';
2465 buffer [1] = '%';
2466 buffer [length-3] = ';';
2467 buffer [length-2] = ' ';
2468 buffer [length-1] = 0;
2469 memcpy(buffer + 2, entity->name, length - 5);
2470 input->free = deallocblankswrapper;
2471 input->base = buffer;
2472 input->cur = buffer;
2473 input->length = length;
2474 input->end = &buffer[length];
2475 return(input);
2476}
2477
2478/**
2479 * xmlParserHandlePEReference:
2480 * @ctxt: the parser context
2481 *
2482 * [69] PEReference ::= '%' Name ';'
2483 *
2484 * [ WFC: No Recursion ]
2485 * A parsed entity must not contain a recursive
2486 * reference to itself, either directly or indirectly.
2487 *
2488 * [ WFC: Entity Declared ]
2489 * In a document without any DTD, a document with only an internal DTD
2490 * subset which contains no parameter entity references, or a document
2491 * with "standalone='yes'", ... ... The declaration of a parameter
2492 * entity must precede any reference to it...
2493 *
2494 * [ VC: Entity Declared ]
2495 * In a document with an external subset or external parameter entities
2496 * with "standalone='no'", ... ... The declaration of a parameter entity
2497 * must precede any reference to it...
2498 *
2499 * [ WFC: In DTD ]
2500 * Parameter-entity references may only appear in the DTD.
2501 * NOTE: misleading but this is handled.
2502 *
2503 * A PEReference may have been detected in the current input stream
2504 * the handling is done accordingly to
2505 * http://www.w3.org/TR/REC-xml#entproc
2506 * i.e.
2507 * - Included in literal in entity values
2508 * - Included as Parameter Entity reference within DTDs
2509 */
2510void
2511xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2512 const xmlChar *name;
2513 xmlEntityPtr entity = NULL;
2514 xmlParserInputPtr input;
2515
2516 if (RAW != '%') return;
2517 switch(ctxt->instate) {
2518 case XML_PARSER_CDATA_SECTION:
2519 return;
2520 case XML_PARSER_COMMENT:
2521 return;
2522 case XML_PARSER_START_TAG:
2523 return;
2524 case XML_PARSER_END_TAG:
2525 return;
2526 case XML_PARSER_EOF:
2527 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2528 return;
2529 case XML_PARSER_PROLOG:
2530 case XML_PARSER_START:
2531 case XML_PARSER_MISC:
2532 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2533 return;
2534 case XML_PARSER_ENTITY_DECL:
2535 case XML_PARSER_CONTENT:
2536 case XML_PARSER_ATTRIBUTE_VALUE:
2537 case XML_PARSER_PI:
2538 case XML_PARSER_SYSTEM_LITERAL:
2539 case XML_PARSER_PUBLIC_LITERAL:
2540 /* we just ignore it there */
2541 return;
2542 case XML_PARSER_EPILOG:
2543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2544 return;
2545 case XML_PARSER_ENTITY_VALUE:
2546 /*
2547 * NOTE: in the case of entity values, we don't do the
2548 * substitution here since we need the literal
2549 * entity value to be able to save the internal
2550 * subset of the document.
2551 * This will be handled by xmlStringDecodeEntities
2552 */
2553 return;
2554 case XML_PARSER_DTD:
2555 /*
2556 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2557 * In the internal DTD subset, parameter-entity references
2558 * can occur only where markup declarations can occur, not
2559 * within markup declarations.
2560 * In that case this is handled in xmlParseMarkupDecl
2561 */
2562 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2563 return;
2564 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2565 return;
2566 break;
2567 case XML_PARSER_IGNORE:
2568 return;
2569 }
2570
2571 NEXT;
2572 name = xmlParseName(ctxt);
2573 if (xmlParserDebugEntities)
2574 xmlGenericError(xmlGenericErrorContext,
2575 "PEReference: %s\n", name);
2576 if (name == NULL) {
2577 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
2578 } else {
2579 if (RAW == ';') {
2580 NEXT;
2581 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2582 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2583 if (ctxt->instate == XML_PARSER_EOF)
2584 return;
2585 if (entity == NULL) {
2586
2587 /*
2588 * [ WFC: Entity Declared ]
2589 * In a document without any DTD, a document with only an
2590 * internal DTD subset which contains no parameter entity
2591 * references, or a document with "standalone='yes'", ...
2592 * ... The declaration of a parameter entity must precede
2593 * any reference to it...
2594 */
2595 if ((ctxt->standalone == 1) ||
2596 ((ctxt->hasExternalSubset == 0) &&
2597 (ctxt->hasPErefs == 0))) {
2598 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
2599 "PEReference: %%%s; not found\n", name);
2600 } else {
2601 /*
2602 * [ VC: Entity Declared ]
2603 * In a document with an external subset or external
2604 * parameter entities with "standalone='no'", ...
2605 * ... The declaration of a parameter entity must precede
2606 * any reference to it...
2607 */
2608 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2609 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2610 "PEReference: %%%s; not found\n",
2611 name, NULL);
2612 } else
2613 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2614 "PEReference: %%%s; not found\n",
2615 name, NULL);
2616 ctxt->valid = 0;
2617 }
2618 xmlParserEntityCheck(ctxt, 0, NULL, 0);
2619 } else if (ctxt->input->free != deallocblankswrapper) {
2620 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2621 if (xmlPushInput(ctxt, input) < 0)
2622 return;
2623 } else {
2624 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2625 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
2626 xmlChar start[4];
2627 xmlCharEncoding enc;
2628
2629 /*
2630 * Note: external parameter entities will not be loaded, it
2631 * is not required for a non-validating parser, unless the
2632 * option of validating, or substituting entities were
2633 * given. Doing so is far more secure as the parser will
2634 * only process data coming from the document entity by
2635 * default.
2636 */
2637 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2638 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2639 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
2640 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2641 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2642 (ctxt->replaceEntities == 0) &&
2643 (ctxt->validate == 0))
2644 return;
2645
2646 /*
2647 * handle the extra spaces added before and after
2648 * c.f. http://www.w3.org/TR/REC-xml#as-PE
2649 * this is done independently.
2650 */
2651 input = xmlNewEntityInputStream(ctxt, entity);
2652 if (xmlPushInput(ctxt, input) < 0)
2653 return;
2654
2655 /*
2656 * Get the 4 first bytes and decode the charset
2657 * if enc != XML_CHAR_ENCODING_NONE
2658 * plug some encoding conversion routines.
2659 * Note that, since we may have some non-UTF8
2660 * encoding (like UTF16, bug 135229), the 'length'
2661 * is not known, but we can calculate based upon
2662 * the amount of data in the buffer.
2663 */
2664 GROW
2665 if (ctxt->instate == XML_PARSER_EOF)
2666 return;
2667 if ((ctxt->input->end - ctxt->input->cur)>=4) {
2668 start[0] = RAW;
2669 start[1] = NXT(1);
2670 start[2] = NXT(2);
2671 start[3] = NXT(3);
2672 enc = xmlDetectCharEncoding(start, 4);
2673 if (enc != XML_CHAR_ENCODING_NONE) {
2674 xmlSwitchEncoding(ctxt, enc);
2675 }
2676 }
2677
2678 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2679 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2680 (IS_BLANK_CH(NXT(5)))) {
2681 xmlParseTextDecl(ctxt);
2682 }
2683 } else {
2684 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2685 "PEReference: %s is not a parameter entity\n",
2686 name);
2687 }
2688 }
2689 } else {
2690 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
2691 }
2692 }
2693}
2694
2695/*
2696 * Macro used to grow the current buffer.
2697 * buffer##_size is expected to be a size_t
2698 * mem_error: is expected to handle memory allocation failures
2699 */
2700#define growBuffer(buffer, n) { \
2701 xmlChar *tmp; \
2702 size_t new_size = buffer##_size * 2 + n; \
2703 if (new_size < buffer##_size) goto mem_error; \
2704 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2705 if (tmp == NULL) goto mem_error; \
2706 buffer = tmp; \
2707 buffer##_size = new_size; \
2708}
2709
2710/**
2711 * xmlStringLenDecodeEntities:
2712 * @ctxt: the parser context
2713 * @str: the input string
2714 * @len: the string length
2715 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2716 * @end: an end marker xmlChar, 0 if none
2717 * @end2: an end marker xmlChar, 0 if none
2718 * @end3: an end marker xmlChar, 0 if none
2719 *
2720 * Takes a entity string content and process to do the adequate substitutions.
2721 *
2722 * [67] Reference ::= EntityRef | CharRef
2723 *
2724 * [69] PEReference ::= '%' Name ';'
2725 *
2726 * Returns A newly allocated string with the substitution done. The caller
2727 * must deallocate it !
2728 */
2729xmlChar *
2730xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2731 int what, xmlChar end, xmlChar end2, xmlChar end3) {
2732 xmlChar *buffer = NULL;
2733 size_t buffer_size = 0;
2734 size_t nbchars = 0;
2735
2736 xmlChar *current = NULL;
2737 xmlChar *rep = NULL;
2738 const xmlChar *last;
2739 xmlEntityPtr ent;
2740 int c,l;
2741
2742 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2743 return(NULL);
2744 last = str + len;
2745
2746 if (((ctxt->depth > 40) &&
2747 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2748 (ctxt->depth > 1024)) {
2749 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2750 return(NULL);
2751 }
2752
2753 /*
2754 * allocate a translation buffer.
2755 */
2756 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2757 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2758 if (buffer == NULL) goto mem_error;
2759
2760 /*
2761 * OK loop until we reach one of the ending char or a size limit.
2762 * we are operating on already parsed values.
2763 */
2764 if (str < last)
2765 c = CUR_SCHAR(str, l);
2766 else
2767 c = 0;
2768 while ((c != 0) && (c != end) && /* non input consuming loop */
2769 (c != end2) && (c != end3)) {
2770
2771 if (c == 0) break;
2772 if ((c == '&') && (str[1] == '#')) {
2773 int val = xmlParseStringCharRef(ctxt, &str);
2774 if (val != 0) {
2775 COPY_BUF(0,buffer,nbchars,val);
2776 }
2777 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2778 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2779 }
2780 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2781 if (xmlParserDebugEntities)
2782 xmlGenericError(xmlGenericErrorContext,
2783 "String decoding Entity Reference: %.30s\n",
2784 str);
2785 ent = xmlParseStringEntityRef(ctxt, &str);
2786 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2787 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2788 goto int_error;
2789 xmlParserEntityCheck(ctxt, 0, ent, 0);
2790 if (ent != NULL)
2791 ctxt->nbentities += ent->checked / 2;
2792 if ((ent != NULL) &&
2793 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2794 if (ent->content != NULL) {
2795 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798 }
2799 } else {
2800 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2801 "predefined entity has no content\n");
2802 }
2803 } else if ((ent != NULL) && (ent->content != NULL)) {
2804 ctxt->depth++;
2805 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2806 0, 0, 0);
2807 ctxt->depth--;
2808
2809 if (rep != NULL) {
2810 current = rep;
2811 while (*current != 0) { /* non input consuming loop */
2812 buffer[nbchars++] = *current++;
2813 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2814 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2815 goto int_error;
2816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2817 }
2818 }
2819 xmlFree(rep);
2820 rep = NULL;
2821 }
2822 } else if (ent != NULL) {
2823 int i = xmlStrlen(ent->name);
2824 const xmlChar *cur = ent->name;
2825
2826 buffer[nbchars++] = '&';
2827 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2828 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2829 }
2830 for (;i > 0;i--)
2831 buffer[nbchars++] = *cur++;
2832 buffer[nbchars++] = ';';
2833 }
2834 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2835 if (xmlParserDebugEntities)
2836 xmlGenericError(xmlGenericErrorContext,
2837 "String decoding PE Reference: %.30s\n", str);
2838 ent = xmlParseStringPEReference(ctxt, &str);
2839 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2840 goto int_error;
2841 xmlParserEntityCheck(ctxt, 0, ent, 0);
2842 if (ent != NULL)
2843 ctxt->nbentities += ent->checked / 2;
2844 if (ent != NULL) {
2845 if (ent->content == NULL) {
2846 xmlLoadEntityContent(ctxt, ent);
2847 }
2848 ctxt->depth++;
2849 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2850 0, 0, 0);
2851 ctxt->depth--;
2852 if (rep != NULL) {
2853 current = rep;
2854 while (*current != 0) { /* non input consuming loop */
2855 buffer[nbchars++] = *current++;
2856 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2857 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2858 goto int_error;
2859 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2860 }
2861 }
2862 xmlFree(rep);
2863 rep = NULL;
2864 }
2865 }
2866 } else {
2867 COPY_BUF(l,buffer,nbchars,c);
2868 str += l;
2869 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2870 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2871 }
2872 }
2873 if (str < last)
2874 c = CUR_SCHAR(str, l);
2875 else
2876 c = 0;
2877 }
2878 buffer[nbchars] = 0;
2879 return(buffer);
2880
2881mem_error:
2882 xmlErrMemory(ctxt, NULL);
2883int_error:
2884 if (rep != NULL)
2885 xmlFree(rep);
2886 if (buffer != NULL)
2887 xmlFree(buffer);
2888 return(NULL);
2889}
2890
2891/**
2892 * xmlStringDecodeEntities:
2893 * @ctxt: the parser context
2894 * @str: the input string
2895 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2896 * @end: an end marker xmlChar, 0 if none
2897 * @end2: an end marker xmlChar, 0 if none
2898 * @end3: an end marker xmlChar, 0 if none
2899 *
2900 * Takes a entity string content and process to do the adequate substitutions.
2901 *
2902 * [67] Reference ::= EntityRef | CharRef
2903 *
2904 * [69] PEReference ::= '%' Name ';'
2905 *
2906 * Returns A newly allocated string with the substitution done. The caller
2907 * must deallocate it !
2908 */
2909xmlChar *
2910xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2911 xmlChar end, xmlChar end2, xmlChar end3) {
2912 if ((ctxt == NULL) || (str == NULL)) return(NULL);
2913 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2914 end, end2, end3));
2915}
2916
2917/************************************************************************
2918 * *
2919 * Commodity functions, cleanup needed ? *
2920 * *
2921 ************************************************************************/
2922
2923/**
2924 * areBlanks:
2925 * @ctxt: an XML parser context
2926 * @str: a xmlChar *
2927 * @len: the size of @str
2928 * @blank_chars: we know the chars are blanks
2929 *
2930 * Is this a sequence of blank chars that one can ignore ?
2931 *
2932 * Returns 1 if ignorable 0 otherwise.
2933 */
2934
2935static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2936 int blank_chars) {
2937 int i, ret;
2938 xmlNodePtr lastChild;
2939
2940 /*
2941 * Don't spend time trying to differentiate them, the same callback is
2942 * used !
2943 */
2944 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2945 return(0);
2946
2947 /*
2948 * Check for xml:space value.
2949 */
2950 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2951 (*(ctxt->space) == -2))
2952 return(0);
2953
2954 /*
2955 * Check that the string is made of blanks
2956 */
2957 if (blank_chars == 0) {
2958 for (i = 0;i < len;i++)
2959 if (!(IS_BLANK_CH(str[i]))) return(0);
2960 }
2961
2962 /*
2963 * Look if the element is mixed content in the DTD if available
2964 */
2965 if (ctxt->node == NULL) return(0);
2966 if (ctxt->myDoc != NULL) {
2967 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2968 if (ret == 0) return(1);
2969 if (ret == 1) return(0);
2970 }
2971
2972 /*
2973 * Otherwise, heuristic :-\
2974 */
2975 if ((RAW != '<') && (RAW != 0xD)) return(0);
2976 if ((ctxt->node->children == NULL) &&
2977 (RAW == '<') && (NXT(1) == '/')) return(0);
2978
2979 lastChild = xmlGetLastChild(ctxt->node);
2980 if (lastChild == NULL) {
2981 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2982 (ctxt->node->content != NULL)) return(0);
2983 } else if (xmlNodeIsText(lastChild))
2984 return(0);
2985 else if ((ctxt->node->children != NULL) &&
2986 (xmlNodeIsText(ctxt->node->children)))
2987 return(0);
2988 return(1);
2989}
2990
2991/************************************************************************
2992 * *
2993 * Extra stuff for namespace support *
2994 * Relates to http://www.w3.org/TR/WD-xml-names *
2995 * *
2996 ************************************************************************/
2997
2998/**
2999 * xmlSplitQName:
3000 * @ctxt: an XML parser context
3001 * @name: an XML parser context
3002 * @prefix: a xmlChar **
3003 *
3004 * parse an UTF8 encoded XML qualified name string
3005 *
3006 * [NS 5] QName ::= (Prefix ':')? LocalPart
3007 *
3008 * [NS 6] Prefix ::= NCName
3009 *
3010 * [NS 7] LocalPart ::= NCName
3011 *
3012 * Returns the local part, and prefix is updated
3013 * to get the Prefix if any.
3014 */
3015
3016xmlChar *
3017xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3018 xmlChar buf[XML_MAX_NAMELEN + 5];
3019 xmlChar *buffer = NULL;
3020 int len = 0;
3021 int max = XML_MAX_NAMELEN;
3022 xmlChar *ret = NULL;
3023 const xmlChar *cur = name;
3024 int c;
3025
3026 if (prefix == NULL) return(NULL);
3027 *prefix = NULL;
3028
3029 if (cur == NULL) return(NULL);
3030
3031#ifndef XML_XML_NAMESPACE
3032 /* xml: prefix is not really a namespace */
3033 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3034 (cur[2] == 'l') && (cur[3] == ':'))
3035 return(xmlStrdup(name));
3036#endif
3037
3038 /* nasty but well=formed */
3039 if (cur[0] == ':')
3040 return(xmlStrdup(name));
3041
3042 c = *cur++;
3043 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3044 buf[len++] = c;
3045 c = *cur++;
3046 }
3047 if (len >= max) {
3048 /*
3049 * Okay someone managed to make a huge name, so he's ready to pay
3050 * for the processing speed.
3051 */
3052 max = len * 2;
3053
3054 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3055 if (buffer == NULL) {
3056 xmlErrMemory(ctxt, NULL);
3057 return(NULL);
3058 }
3059 memcpy(buffer, buf, len);
3060 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3061 if (len + 10 > max) {
3062 xmlChar *tmp;
3063
3064 max *= 2;
3065 tmp = (xmlChar *) xmlRealloc(buffer,
3066 max * sizeof(xmlChar));
3067 if (tmp == NULL) {
3068 xmlFree(buffer);
3069 xmlErrMemory(ctxt, NULL);
3070 return(NULL);
3071 }
3072 buffer = tmp;
3073 }
3074 buffer[len++] = c;
3075 c = *cur++;
3076 }
3077 buffer[len] = 0;
3078 }
3079
3080 if ((c == ':') && (*cur == 0)) {
3081 if (buffer != NULL)
3082 xmlFree(buffer);
3083 *prefix = NULL;
3084 return(xmlStrdup(name));
3085 }
3086
3087 if (buffer == NULL)
3088 ret = xmlStrndup(buf, len);
3089 else {
3090 ret = buffer;
3091 buffer = NULL;
3092 max = XML_MAX_NAMELEN;
3093 }
3094
3095
3096 if (c == ':') {
3097 c = *cur;
3098 *prefix = ret;
3099 if (c == 0) {
3100 return(xmlStrndup(BAD_CAST "", 0));
3101 }
3102 len = 0;
3103
3104 /*
3105 * Check that the first character is proper to start
3106 * a new name
3107 */
3108 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3109 ((c >= 0x41) && (c <= 0x5A)) ||
3110 (c == '_') || (c == ':'))) {
3111 int l;
3112 int first = CUR_SCHAR(cur, l);
3113
3114 if (!IS_LETTER(first) && (first != '_')) {
3115 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3116 "Name %s is not XML Namespace compliant\n",
3117 name);
3118 }
3119 }
3120 cur++;
3121
3122 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3123 buf[len++] = c;
3124 c = *cur++;
3125 }
3126 if (len >= max) {
3127 /*
3128 * Okay someone managed to make a huge name, so he's ready to pay
3129 * for the processing speed.
3130 */
3131 max = len * 2;
3132
3133 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3134 if (buffer == NULL) {
3135 xmlErrMemory(ctxt, NULL);
3136 return(NULL);
3137 }
3138 memcpy(buffer, buf, len);
3139 while (c != 0) { /* tested bigname2.xml */
3140 if (len + 10 > max) {
3141 xmlChar *tmp;
3142
3143 max *= 2;
3144 tmp = (xmlChar *) xmlRealloc(buffer,
3145 max * sizeof(xmlChar));
3146 if (tmp == NULL) {
3147 xmlErrMemory(ctxt, NULL);
3148 xmlFree(buffer);
3149 return(NULL);
3150 }
3151 buffer = tmp;
3152 }
3153 buffer[len++] = c;
3154 c = *cur++;
3155 }
3156 buffer[len] = 0;
3157 }
3158
3159 if (buffer == NULL)
3160 ret = xmlStrndup(buf, len);
3161 else {
3162 ret = buffer;
3163 }
3164 }
3165
3166 return(ret);
3167}
3168
3169/************************************************************************
3170 * *
3171 * The parser itself *
3172 * Relates to http://www.w3.org/TR/REC-xml *
3173 * *
3174 ************************************************************************/
3175
3176/************************************************************************
3177 * *
3178 * Routines to parse Name, NCName and NmToken *
3179 * *
3180 ************************************************************************/
3181#ifdef DEBUG
3182static unsigned long nbParseName = 0;
3183static unsigned long nbParseNmToken = 0;
3184static unsigned long nbParseNCName = 0;
3185static unsigned long nbParseNCNameComplex = 0;
3186static unsigned long nbParseNameComplex = 0;
3187static unsigned long nbParseStringName = 0;
3188#endif
3189
3190/*
3191 * The two following functions are related to the change of accepted
3192 * characters for Name and NmToken in the Revision 5 of XML-1.0
3193 * They correspond to the modified production [4] and the new production [4a]
3194 * changes in that revision. Also note that the macros used for the
3195 * productions Letter, Digit, CombiningChar and Extender are not needed
3196 * anymore.
3197 * We still keep compatibility to pre-revision5 parsing semantic if the
3198 * new XML_PARSE_OLD10 option is given to the parser.
3199 */
3200static int
3201xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3202 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3203 /*
3204 * Use the new checks of production [4] [4a] amd [5] of the
3205 * Update 5 of XML-1.0
3206 */
3207 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 (c == '_') || (c == ':') ||
3211 ((c >= 0xC0) && (c <= 0xD6)) ||
3212 ((c >= 0xD8) && (c <= 0xF6)) ||
3213 ((c >= 0xF8) && (c <= 0x2FF)) ||
3214 ((c >= 0x370) && (c <= 0x37D)) ||
3215 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3216 ((c >= 0x200C) && (c <= 0x200D)) ||
3217 ((c >= 0x2070) && (c <= 0x218F)) ||
3218 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3219 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3220 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3221 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3222 ((c >= 0x10000) && (c <= 0xEFFFF))))
3223 return(1);
3224 } else {
3225 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3226 return(1);
3227 }
3228 return(0);
3229}
3230
3231static int
3232xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3233 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3234 /*
3235 * Use the new checks of production [4] [4a] amd [5] of the
3236 * Update 5 of XML-1.0
3237 */
3238 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3239 (((c >= 'a') && (c <= 'z')) ||
3240 ((c >= 'A') && (c <= 'Z')) ||
3241 ((c >= '0') && (c <= '9')) || /* !start */
3242 (c == '_') || (c == ':') ||
3243 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3244 ((c >= 0xC0) && (c <= 0xD6)) ||
3245 ((c >= 0xD8) && (c <= 0xF6)) ||
3246 ((c >= 0xF8) && (c <= 0x2FF)) ||
3247 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3248 ((c >= 0x370) && (c <= 0x37D)) ||
3249 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3250 ((c >= 0x200C) && (c <= 0x200D)) ||
3251 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3252 ((c >= 0x2070) && (c <= 0x218F)) ||
3253 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3254 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3255 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3256 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3257 ((c >= 0x10000) && (c <= 0xEFFFF))))
3258 return(1);
3259 } else {
3260 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3261 (c == '.') || (c == '-') ||
3262 (c == '_') || (c == ':') ||
3263 (IS_COMBINING(c)) ||
3264 (IS_EXTENDER(c)))
3265 return(1);
3266 }
3267 return(0);
3268}
3269
3270static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3271 int *len, int *alloc, int normalize);
3272
3273static const xmlChar *
3274xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3275 int len = 0, l;
3276 int c;
3277 int count = 0;
3278
3279#ifdef DEBUG
3280 nbParseNameComplex++;
3281#endif
3282
3283 /*
3284 * Handler for more complex cases
3285 */
3286 GROW;
3287 if (ctxt->instate == XML_PARSER_EOF)
3288 return(NULL);
3289 c = CUR_CHAR(l);
3290 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3291 /*
3292 * Use the new checks of production [4] [4a] amd [5] of the
3293 * Update 5 of XML-1.0
3294 */
3295 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 (!(((c >= 'a') && (c <= 'z')) ||
3297 ((c >= 'A') && (c <= 'Z')) ||
3298 (c == '_') || (c == ':') ||
3299 ((c >= 0xC0) && (c <= 0xD6)) ||
3300 ((c >= 0xD8) && (c <= 0xF6)) ||
3301 ((c >= 0xF8) && (c <= 0x2FF)) ||
3302 ((c >= 0x370) && (c <= 0x37D)) ||
3303 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3304 ((c >= 0x200C) && (c <= 0x200D)) ||
3305 ((c >= 0x2070) && (c <= 0x218F)) ||
3306 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3307 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3308 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3309 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3310 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3311 return(NULL);
3312 }
3313 len += l;
3314 NEXTL(l);
3315 c = CUR_CHAR(l);
3316 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3317 (((c >= 'a') && (c <= 'z')) ||
3318 ((c >= 'A') && (c <= 'Z')) ||
3319 ((c >= '0') && (c <= '9')) || /* !start */
3320 (c == '_') || (c == ':') ||
3321 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3322 ((c >= 0xC0) && (c <= 0xD6)) ||
3323 ((c >= 0xD8) && (c <= 0xF6)) ||
3324 ((c >= 0xF8) && (c <= 0x2FF)) ||
3325 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3326 ((c >= 0x370) && (c <= 0x37D)) ||
3327 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3328 ((c >= 0x200C) && (c <= 0x200D)) ||
3329 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3330 ((c >= 0x2070) && (c <= 0x218F)) ||
3331 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3332 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3333 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3334 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3335 ((c >= 0x10000) && (c <= 0xEFFFF))
3336 )) {
3337 if (count++ > XML_PARSER_CHUNK_SIZE) {
3338 count = 0;
3339 GROW;
3340 if (ctxt->instate == XML_PARSER_EOF)
3341 return(NULL);
3342 }
3343 len += l;
3344 NEXTL(l);
3345 c = CUR_CHAR(l);
3346 }
3347 } else {
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!IS_LETTER(c) && (c != '_') &&
3350 (c != ':'))) {
3351 return(NULL);
3352 }
3353 len += l;
3354 NEXTL(l);
3355 c = CUR_CHAR(l);
3356
3357 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3358 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3359 (c == '.') || (c == '-') ||
3360 (c == '_') || (c == ':') ||
3361 (IS_COMBINING(c)) ||
3362 (IS_EXTENDER(c)))) {
3363 if (count++ > XML_PARSER_CHUNK_SIZE) {
3364 count = 0;
3365 GROW;
3366 if (ctxt->instate == XML_PARSER_EOF)
3367 return(NULL);
3368 }
3369 len += l;
3370 NEXTL(l);
3371 c = CUR_CHAR(l);
3372 if (c == 0) {
3373 count = 0;
3374 GROW;
3375 if (ctxt->instate == XML_PARSER_EOF)
3376 return(NULL);
3377 c = CUR_CHAR(l);
3378 }
3379 }
3380 }
3381 if ((len > XML_MAX_NAME_LENGTH) &&
3382 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3383 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3384 return(NULL);
3385 }
3386 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3387 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3388 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3389}
3390
3391/**
3392 * xmlParseName:
3393 * @ctxt: an XML parser context
3394 *
3395 * parse an XML name.
3396 *
3397 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3398 * CombiningChar | Extender
3399 *
3400 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3401 *
3402 * [6] Names ::= Name (#x20 Name)*
3403 *
3404 * Returns the Name parsed or NULL
3405 */
3406
3407const xmlChar *
3408xmlParseName(xmlParserCtxtPtr ctxt) {
3409 const xmlChar *in;
3410 const xmlChar *ret;
3411 int count = 0;
3412
3413 GROW;
3414
3415#ifdef DEBUG
3416 nbParseName++;
3417#endif
3418
3419 /*
3420 * Accelerator for simple ASCII names
3421 */
3422 in = ctxt->input->cur;
3423 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3424 ((*in >= 0x41) && (*in <= 0x5A)) ||
3425 (*in == '_') || (*in == ':')) {
3426 in++;
3427 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3428 ((*in >= 0x41) && (*in <= 0x5A)) ||
3429 ((*in >= 0x30) && (*in <= 0x39)) ||
3430 (*in == '_') || (*in == '-') ||
3431 (*in == ':') || (*in == '.'))
3432 in++;
3433 if ((*in > 0) && (*in < 0x80)) {
3434 count = in - ctxt->input->cur;
3435 if ((count > XML_MAX_NAME_LENGTH) &&
3436 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3437 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3438 return(NULL);
3439 }
3440 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3441 ctxt->input->cur = in;
3442 ctxt->nbChars += count;
3443 ctxt->input->col += count;
3444 if (ret == NULL)
3445 xmlErrMemory(ctxt, NULL);
3446 return(ret);
3447 }
3448 }
3449 /* accelerator for special cases */
3450 return(xmlParseNameComplex(ctxt));
3451}
3452
3453static const xmlChar *
3454xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3455 int len = 0, l;
3456 int c;
3457 int count = 0;
3458 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
3459
3460#ifdef DEBUG
3461 nbParseNCNameComplex++;
3462#endif
3463
3464 /*
3465 * Handler for more complex cases
3466 */
3467 GROW;
3468 end = ctxt->input->cur;
3469 c = CUR_CHAR(l);
3470 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3471 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3472 return(NULL);
3473 }
3474
3475 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3476 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3477 if (count++ > XML_PARSER_CHUNK_SIZE) {
3478 if ((len > XML_MAX_NAME_LENGTH) &&
3479 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3480 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481 return(NULL);
3482 }
3483 count = 0;
3484 GROW;
3485 if (ctxt->instate == XML_PARSER_EOF)
3486 return(NULL);
3487 }
3488 len += l;
3489 NEXTL(l);
3490 end = ctxt->input->cur;
3491 c = CUR_CHAR(l);
3492 if (c == 0) {
3493 count = 0;
3494 GROW;
3495 if (ctxt->instate == XML_PARSER_EOF)
3496 return(NULL);
3497 end = ctxt->input->cur;
3498 c = CUR_CHAR(l);
3499 }
3500 }
3501 if ((len > XML_MAX_NAME_LENGTH) &&
3502 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3503 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3504 return(NULL);
3505 }
3506 return(xmlDictLookup(ctxt->dict, end - len, len));
3507}
3508
3509/**
3510 * xmlParseNCName:
3511 * @ctxt: an XML parser context
3512 * @len: length of the string parsed
3513 *
3514 * parse an XML name.
3515 *
3516 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3517 * CombiningChar | Extender
3518 *
3519 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3520 *
3521 * Returns the Name parsed or NULL
3522 */
3523
3524static const xmlChar *
3525xmlParseNCName(xmlParserCtxtPtr ctxt) {
3526 const xmlChar *in;
3527 const xmlChar *ret;
3528 int count = 0;
3529
3530#ifdef DEBUG
3531 nbParseNCName++;
3532#endif
3533
3534 /*
3535 * Accelerator for simple ASCII names
3536 */
3537 in = ctxt->input->cur;
3538 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3539 ((*in >= 0x41) && (*in <= 0x5A)) ||
3540 (*in == '_')) {
3541 in++;
3542 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3543 ((*in >= 0x41) && (*in <= 0x5A)) ||
3544 ((*in >= 0x30) && (*in <= 0x39)) ||
3545 (*in == '_') || (*in == '-') ||
3546 (*in == '.'))
3547 in++;
3548 if ((*in > 0) && (*in < 0x80)) {
3549 count = in - ctxt->input->cur;
3550 if ((count > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 return(NULL);
3554 }
3555 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3556 ctxt->input->cur = in;
3557 ctxt->nbChars += count;
3558 ctxt->input->col += count;
3559 if (ret == NULL) {
3560 xmlErrMemory(ctxt, NULL);
3561 }
3562 return(ret);
3563 }
3564 }
3565 return(xmlParseNCNameComplex(ctxt));
3566}
3567
3568/**
3569 * xmlParseNameAndCompare:
3570 * @ctxt: an XML parser context
3571 *
3572 * parse an XML name and compares for match
3573 * (specialized for endtag parsing)
3574 *
3575 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3576 * and the name for mismatch
3577 */
3578
3579static const xmlChar *
3580xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3581 register const xmlChar *cmp = other;
3582 register const xmlChar *in;
3583 const xmlChar *ret;
3584
3585 GROW;
3586 if (ctxt->instate == XML_PARSER_EOF)
3587 return(NULL);
3588
3589 in = ctxt->input->cur;
3590 while (*in != 0 && *in == *cmp) {
3591 ++in;
3592 ++cmp;
3593 ctxt->input->col++;
3594 }
3595 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3596 /* success */
3597 ctxt->input->cur = in;
3598 return (const xmlChar*) 1;
3599 }
3600 /* failure (or end of input buffer), check with full function */
3601 ret = xmlParseName (ctxt);
3602 /* strings coming from the dictionnary direct compare possible */
3603 if (ret == other) {
3604 return (const xmlChar*) 1;
3605 }
3606 return ret;
3607}
3608
3609/**
3610 * xmlParseStringName:
3611 * @ctxt: an XML parser context
3612 * @str: a pointer to the string pointer (IN/OUT)
3613 *
3614 * parse an XML name.
3615 *
3616 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3617 * CombiningChar | Extender
3618 *
3619 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3620 *
3621 * [6] Names ::= Name (#x20 Name)*
3622 *
3623 * Returns the Name parsed or NULL. The @str pointer
3624 * is updated to the current location in the string.
3625 */
3626
3627static xmlChar *
3628xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3629 xmlChar buf[XML_MAX_NAMELEN + 5];
3630 const xmlChar *cur = *str;
3631 int len = 0, l;
3632 int c;
3633
3634#ifdef DEBUG
3635 nbParseStringName++;
3636#endif
3637
3638 c = CUR_SCHAR(cur, l);
3639 if (!xmlIsNameStartChar(ctxt, c)) {
3640 return(NULL);
3641 }
3642
3643 COPY_BUF(l,buf,len,c);
3644 cur += l;
3645 c = CUR_SCHAR(cur, l);
3646 while (xmlIsNameChar(ctxt, c)) {
3647 COPY_BUF(l,buf,len,c);
3648 cur += l;
3649 c = CUR_SCHAR(cur, l);
3650 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3651 /*
3652 * Okay someone managed to make a huge name, so he's ready to pay
3653 * for the processing speed.
3654 */
3655 xmlChar *buffer;
3656 int max = len * 2;
3657
3658 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3659 if (buffer == NULL) {
3660 xmlErrMemory(ctxt, NULL);
3661 return(NULL);
3662 }
3663 memcpy(buffer, buf, len);
3664 while (xmlIsNameChar(ctxt, c)) {
3665 if (len + 10 > max) {
3666 xmlChar *tmp;
3667
3668 if ((len > XML_MAX_NAME_LENGTH) &&
3669 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3670 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3671 xmlFree(buffer);
3672 return(NULL);
3673 }
3674 max *= 2;
3675 tmp = (xmlChar *) xmlRealloc(buffer,
3676 max * sizeof(xmlChar));
3677 if (tmp == NULL) {
3678 xmlErrMemory(ctxt, NULL);
3679 xmlFree(buffer);
3680 return(NULL);
3681 }
3682 buffer = tmp;
3683 }
3684 COPY_BUF(l,buffer,len,c);
3685 cur += l;
3686 c = CUR_SCHAR(cur, l);
3687 }
3688 buffer[len] = 0;
3689 *str = cur;
3690 return(buffer);
3691 }
3692 }
3693 if ((len > XML_MAX_NAME_LENGTH) &&
3694 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3695 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3696 return(NULL);
3697 }
3698 *str = cur;
3699 return(xmlStrndup(buf, len));
3700}
3701
3702/**
3703 * xmlParseNmtoken:
3704 * @ctxt: an XML parser context
3705 *
3706 * parse an XML Nmtoken.
3707 *
3708 * [7] Nmtoken ::= (NameChar)+
3709 *
3710 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3711 *
3712 * Returns the Nmtoken parsed or NULL
3713 */
3714
3715xmlChar *
3716xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3717 xmlChar buf[XML_MAX_NAMELEN + 5];
3718 int len = 0, l;
3719 int c;
3720 int count = 0;
3721
3722#ifdef DEBUG
3723 nbParseNmToken++;
3724#endif
3725
3726 GROW;
3727 if (ctxt->instate == XML_PARSER_EOF)
3728 return(NULL);
3729 c = CUR_CHAR(l);
3730
3731 while (xmlIsNameChar(ctxt, c)) {
3732 if (count++ > XML_PARSER_CHUNK_SIZE) {
3733 count = 0;
3734 GROW;
3735 }
3736 COPY_BUF(l,buf,len,c);
3737 NEXTL(l);
3738 c = CUR_CHAR(l);
3739 if (c == 0) {
3740 count = 0;
3741 GROW;
3742 if (ctxt->instate == XML_PARSER_EOF)
3743 return(NULL);
3744 c = CUR_CHAR(l);
3745 }
3746 if (len >= XML_MAX_NAMELEN) {
3747 /*
3748 * Okay someone managed to make a huge token, so he's ready to pay
3749 * for the processing speed.
3750 */
3751 xmlChar *buffer;
3752 int max = len * 2;
3753
3754 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3755 if (buffer == NULL) {
3756 xmlErrMemory(ctxt, NULL);
3757 return(NULL);
3758 }
3759 memcpy(buffer, buf, len);
3760 while (xmlIsNameChar(ctxt, c)) {
3761 if (count++ > XML_PARSER_CHUNK_SIZE) {
3762 count = 0;
3763 GROW;
3764 if (ctxt->instate == XML_PARSER_EOF) {
3765 xmlFree(buffer);
3766 return(NULL);
3767 }
3768 }
3769 if (len + 10 > max) {
3770 xmlChar *tmp;
3771
3772 if ((max > XML_MAX_NAME_LENGTH) &&
3773 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3774 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3775 xmlFree(buffer);
3776 return(NULL);
3777 }
3778 max *= 2;
3779 tmp = (xmlChar *) xmlRealloc(buffer,
3780 max * sizeof(xmlChar));
3781 if (tmp == NULL) {
3782 xmlErrMemory(ctxt, NULL);
3783 xmlFree(buffer);
3784 return(NULL);
3785 }
3786 buffer = tmp;
3787 }
3788 COPY_BUF(l,buffer,len,c);
3789 NEXTL(l);
3790 c = CUR_CHAR(l);
3791 }
3792 buffer[len] = 0;
3793 return(buffer);
3794 }
3795 }
3796 if (len == 0)
3797 return(NULL);
3798 if ((len > XML_MAX_NAME_LENGTH) &&
3799 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3800 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3801 return(NULL);
3802 }
3803 return(xmlStrndup(buf, len));
3804}
3805
3806/**
3807 * xmlParseEntityValue:
3808 * @ctxt: an XML parser context
3809 * @orig: if non-NULL store a copy of the original entity value
3810 *
3811 * parse a value for ENTITY declarations
3812 *
3813 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3814 * "'" ([^%&'] | PEReference | Reference)* "'"
3815 *
3816 * Returns the EntityValue parsed with reference substituted or NULL
3817 */
3818
3819xmlChar *
3820xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3821 xmlChar *buf = NULL;
3822 int len = 0;
3823 int size = XML_PARSER_BUFFER_SIZE;
3824 int c, l;
3825 xmlChar stop;
3826 xmlChar *ret = NULL;
3827 const xmlChar *cur = NULL;
3828 xmlParserInputPtr input;
3829
3830 if (RAW == '"') stop = '"';
3831 else if (RAW == '\'') stop = '\'';
3832 else {
3833 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3834 return(NULL);
3835 }
3836 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3837 if (buf == NULL) {
3838 xmlErrMemory(ctxt, NULL);
3839 return(NULL);
3840 }
3841
3842 /*
3843 * The content of the entity definition is copied in a buffer.
3844 */
3845
3846 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3847 input = ctxt->input;
3848 GROW;
3849 if (ctxt->instate == XML_PARSER_EOF) {
3850 xmlFree(buf);
3851 return(NULL);
3852 }
3853 NEXT;
3854 c = CUR_CHAR(l);
3855 /*
3856 * NOTE: 4.4.5 Included in Literal
3857 * When a parameter entity reference appears in a literal entity
3858 * value, ... a single or double quote character in the replacement
3859 * text is always treated as a normal data character and will not
3860 * terminate the literal.
3861 * In practice it means we stop the loop only when back at parsing
3862 * the initial entity and the quote is found
3863 */
3864 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3865 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3866 if (len + 5 >= size) {
3867 xmlChar *tmp;
3868
3869 size *= 2;
3870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3871 if (tmp == NULL) {
3872 xmlErrMemory(ctxt, NULL);
3873 xmlFree(buf);
3874 return(NULL);
3875 }
3876 buf = tmp;
3877 }
3878 COPY_BUF(l,buf,len,c);
3879 NEXTL(l);
3880 /*
3881 * Pop-up of finished entities.
3882 */
3883 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3884 xmlPopInput(ctxt);
3885
3886 GROW;
3887 c = CUR_CHAR(l);
3888 if (c == 0) {
3889 GROW;
3890 c = CUR_CHAR(l);
3891 }
3892 }
3893 buf[len] = 0;
3894 if (ctxt->instate == XML_PARSER_EOF) {
3895 xmlFree(buf);
3896 return(NULL);
3897 }
3898
3899 /*
3900 * Raise problem w.r.t. '&' and '%' being used in non-entities
3901 * reference constructs. Note Charref will be handled in
3902 * xmlStringDecodeEntities()
3903 */
3904 cur = buf;
3905 while (*cur != 0) { /* non input consuming */
3906 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3907 xmlChar *name;
3908 xmlChar tmp = *cur;
3909
3910 cur++;
3911 name = xmlParseStringName(ctxt, &cur);
3912 if ((name == NULL) || (*cur != ';')) {
3913 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3914 "EntityValue: '%c' forbidden except for entities references\n",
3915 tmp);
3916 }
3917 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3918 (ctxt->inputNr == 1)) {
3919 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3920 }
3921 if (name != NULL)
3922 xmlFree(name);
3923 if (*cur == 0)
3924 break;
3925 }
3926 cur++;
3927 }
3928
3929 /*
3930 * Then PEReference entities are substituted.
3931 */
3932 if (c != stop) {
3933 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3934 xmlFree(buf);
3935 } else {
3936 NEXT;
3937 /*
3938 * NOTE: 4.4.7 Bypassed
3939 * When a general entity reference appears in the EntityValue in
3940 * an entity declaration, it is bypassed and left as is.
3941 * so XML_SUBSTITUTE_REF is not set here.
3942 */
3943 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3944 0, 0, 0);
3945 if (orig != NULL)
3946 *orig = buf;
3947 else
3948 xmlFree(buf);
3949 }
3950
3951 return(ret);
3952}
3953
3954/**
3955 * xmlParseAttValueComplex:
3956 * @ctxt: an XML parser context
3957 * @len: the resulting attribute len
3958 * @normalize: wether to apply the inner normalization
3959 *
3960 * parse a value for an attribute, this is the fallback function
3961 * of xmlParseAttValue() when the attribute parsing requires handling
3962 * of non-ASCII characters, or normalization compaction.
3963 *
3964 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3965 */
3966static xmlChar *
3967xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3968 xmlChar limit = 0;
3969 xmlChar *buf = NULL;
3970 xmlChar *rep = NULL;
3971 size_t len = 0;
3972 size_t buf_size = 0;
3973 int c, l, in_space = 0;
3974 xmlChar *current = NULL;
3975 xmlEntityPtr ent;
3976
3977 if (NXT(0) == '"') {
3978 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3979 limit = '"';
3980 NEXT;
3981 } else if (NXT(0) == '\'') {
3982 limit = '\'';
3983 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3984 NEXT;
3985 } else {
3986 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3987 return(NULL);
3988 }
3989
3990 /*
3991 * allocate a translation buffer.
3992 */
3993 buf_size = XML_PARSER_BUFFER_SIZE;
3994 buf = (xmlChar *) xmlMallocAtomic(buf_size);
3995 if (buf == NULL) goto mem_error;
3996
3997 /*
3998 * OK loop until we reach one of the ending char or a size limit.
3999 */
4000 c = CUR_CHAR(l);
4001 while (((NXT(0) != limit) && /* checked */
4002 (IS_CHAR(c)) && (c != '<')) &&
4003 (ctxt->instate != XML_PARSER_EOF)) {
4004 /*
4005 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4006 * special option is given
4007 */
4008 if ((len > XML_MAX_TEXT_LENGTH) &&
4009 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4010 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4011 "AttValue length too long\n");
4012 goto mem_error;
4013 }
4014 if (c == 0) break;
4015 if (c == '&') {
4016 in_space = 0;
4017 if (NXT(1) == '#') {
4018 int val = xmlParseCharRef(ctxt);
4019
4020 if (val == '&') {
4021 if (ctxt->replaceEntities) {
4022 if (len + 10 > buf_size) {
4023 growBuffer(buf, 10);
4024 }
4025 buf[len++] = '&';
4026 } else {
4027 /*
4028 * The reparsing will be done in xmlStringGetNodeList()
4029 * called by the attribute() function in SAX.c
4030 */
4031 if (len + 10 > buf_size) {
4032 growBuffer(buf, 10);
4033 }
4034 buf[len++] = '&';
4035 buf[len++] = '#';
4036 buf[len++] = '3';
4037 buf[len++] = '8';
4038 buf[len++] = ';';
4039 }
4040 } else if (val != 0) {
4041 if (len + 10 > buf_size) {
4042 growBuffer(buf, 10);
4043 }
4044 len += xmlCopyChar(0, &buf[len], val);
4045 }
4046 } else {
4047 ent = xmlParseEntityRef(ctxt);
4048 ctxt->nbentities++;
4049 if (ent != NULL)
4050 ctxt->nbentities += ent->owner;
4051 if ((ent != NULL) &&
4052 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4053 if (len + 10 > buf_size) {
4054 growBuffer(buf, 10);
4055 }
4056 if ((ctxt->replaceEntities == 0) &&
4057 (ent->content[0] == '&')) {
4058 buf[len++] = '&';
4059 buf[len++] = '#';
4060 buf[len++] = '3';
4061 buf[len++] = '8';
4062 buf[len++] = ';';
4063 } else {
4064 buf[len++] = ent->content[0];
4065 }
4066 } else if ((ent != NULL) &&
4067 (ctxt->replaceEntities != 0)) {
4068 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4069 rep = xmlStringDecodeEntities(ctxt, ent->content,
4070 XML_SUBSTITUTE_REF,
4071 0, 0, 0);
4072 if (rep != NULL) {
4073 current = rep;
4074 while (*current != 0) { /* non input consuming */
4075 if ((*current == 0xD) || (*current == 0xA) ||
4076 (*current == 0x9)) {
4077 buf[len++] = 0x20;
4078 current++;
4079 } else
4080 buf[len++] = *current++;
4081 if (len + 10 > buf_size) {
4082 growBuffer(buf, 10);
4083 }
4084 }
4085 xmlFree(rep);
4086 rep = NULL;
4087 }
4088 } else {
4089 if (len + 10 > buf_size) {
4090 growBuffer(buf, 10);
4091 }
4092 if (ent->content != NULL)
4093 buf[len++] = ent->content[0];
4094 }
4095 } else if (ent != NULL) {
4096 int i = xmlStrlen(ent->name);
4097 const xmlChar *cur = ent->name;
4098
4099 /*
4100 * This may look absurd but is needed to detect
4101 * entities problems
4102 */
4103 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4104 (ent->content != NULL) && (ent->checked == 0)) {
4105 unsigned long oldnbent = ctxt->nbentities;
4106
4107 rep = xmlStringDecodeEntities(ctxt, ent->content,
4108 XML_SUBSTITUTE_REF, 0, 0, 0);
4109
4110 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
4111 if (rep != NULL) {
4112 if (xmlStrchr(rep, '<'))
4113 ent->checked |= 1;
4114 xmlFree(rep);
4115 rep = NULL;
4116 }
4117 }
4118
4119 /*
4120 * Just output the reference
4121 */
4122 buf[len++] = '&';
4123 while (len + i + 10 > buf_size) {
4124 growBuffer(buf, i + 10);
4125 }
4126 for (;i > 0;i--)
4127 buf[len++] = *cur++;
4128 buf[len++] = ';';
4129 }
4130 }
4131 } else {
4132 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133 if ((len != 0) || (!normalize)) {
4134 if ((!normalize) || (!in_space)) {
4135 COPY_BUF(l,buf,len,0x20);
4136 while (len + 10 > buf_size) {
4137 growBuffer(buf, 10);
4138 }
4139 }
4140 in_space = 1;
4141 }
4142 } else {
4143 in_space = 0;
4144 COPY_BUF(l,buf,len,c);
4145 if (len + 10 > buf_size) {
4146 growBuffer(buf, 10);
4147 }
4148 }
4149 NEXTL(l);
4150 }
4151 GROW;
4152 c = CUR_CHAR(l);
4153 }
4154 if (ctxt->instate == XML_PARSER_EOF)
4155 goto error;
4156
4157 if ((in_space) && (normalize)) {
4158 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4159 }
4160 buf[len] = 0;
4161 if (RAW == '<') {
4162 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4163 } else if (RAW != limit) {
4164 if ((c != 0) && (!IS_CHAR(c))) {
4165 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4166 "invalid character in attribute value\n");
4167 } else {
4168 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4169 "AttValue: ' expected\n");
4170 }
4171 } else
4172 NEXT;
4173
4174 /*
4175 * There we potentially risk an overflow, don't allow attribute value of
4176 * length more than INT_MAX it is a very reasonnable assumption !
4177 */
4178 if (len >= INT_MAX) {
4179 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4180 "AttValue length too long\n");
4181 goto mem_error;
4182 }
4183
4184 if (attlen != NULL) *attlen = (int) len;
4185 return(buf);
4186
4187mem_error:
4188 xmlErrMemory(ctxt, NULL);
4189error:
4190 if (buf != NULL)
4191 xmlFree(buf);
4192 if (rep != NULL)
4193 xmlFree(rep);
4194 return(NULL);
4195}
4196
4197/**
4198 * xmlParseAttValue:
4199 * @ctxt: an XML parser context
4200 *
4201 * parse a value for an attribute
4202 * Note: the parser won't do substitution of entities here, this
4203 * will be handled later in xmlStringGetNodeList
4204 *
4205 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4206 * "'" ([^<&'] | Reference)* "'"
4207 *
4208 * 3.3.3 Attribute-Value Normalization:
4209 * Before the value of an attribute is passed to the application or
4210 * checked for validity, the XML processor must normalize it as follows:
4211 * - a character reference is processed by appending the referenced
4212 * character to the attribute value
4213 * - an entity reference is processed by recursively processing the
4214 * replacement text of the entity
4215 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4216 * appending #x20 to the normalized value, except that only a single
4217 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4218 * parsed entity or the literal entity value of an internal parsed entity
4219 * - other characters are processed by appending them to the normalized value
4220 * If the declared value is not CDATA, then the XML processor must further
4221 * process the normalized attribute value by discarding any leading and
4222 * trailing space (#x20) characters, and by replacing sequences of space
4223 * (#x20) characters by a single space (#x20) character.
4224 * All attributes for which no declaration has been read should be treated
4225 * by a non-validating parser as if declared CDATA.
4226 *
4227 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4228 */
4229
4230
4231xmlChar *
4232xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4233 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4234 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4235}
4236
4237/**
4238 * xmlParseSystemLiteral:
4239 * @ctxt: an XML parser context
4240 *
4241 * parse an XML Literal
4242 *
4243 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4244 *
4245 * Returns the SystemLiteral parsed or NULL
4246 */
4247
4248xmlChar *
4249xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4250 xmlChar *buf = NULL;
4251 int len = 0;
4252 int size = XML_PARSER_BUFFER_SIZE;
4253 int cur, l;
4254 xmlChar stop;
4255 int state = ctxt->instate;
4256 int count = 0;
4257
4258 SHRINK;
4259 if (RAW == '"') {
4260 NEXT;
4261 stop = '"';
4262 } else if (RAW == '\'') {
4263 NEXT;
4264 stop = '\'';
4265 } else {
4266 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4267 return(NULL);
4268 }
4269
4270 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4271 if (buf == NULL) {
4272 xmlErrMemory(ctxt, NULL);
4273 return(NULL);
4274 }
4275 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4276 cur = CUR_CHAR(l);
4277 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4278 if (len + 5 >= size) {
4279 xmlChar *tmp;
4280
4281 if ((size > XML_MAX_NAME_LENGTH) &&
4282 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4283 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4284 xmlFree(buf);
4285 ctxt->instate = (xmlParserInputState) state;
4286 return(NULL);
4287 }
4288 size *= 2;
4289 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4290 if (tmp == NULL) {
4291 xmlFree(buf);
4292 xmlErrMemory(ctxt, NULL);
4293 ctxt->instate = (xmlParserInputState) state;
4294 return(NULL);
4295 }
4296 buf = tmp;
4297 }
4298 count++;
4299 if (count > 50) {
4300 GROW;
4301 count = 0;
4302 if (ctxt->instate == XML_PARSER_EOF) {
4303 xmlFree(buf);
4304 return(NULL);
4305 }
4306 }
4307 COPY_BUF(l,buf,len,cur);
4308 NEXTL(l);
4309 cur = CUR_CHAR(l);
4310 if (cur == 0) {
4311 GROW;
4312 SHRINK;
4313 cur = CUR_CHAR(l);
4314 }
4315 }
4316 buf[len] = 0;
4317 ctxt->instate = (xmlParserInputState) state;
4318 if (!IS_CHAR(cur)) {
4319 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4320 } else {
4321 NEXT;
4322 }
4323 return(buf);
4324}
4325
4326/**
4327 * xmlParsePubidLiteral:
4328 * @ctxt: an XML parser context
4329 *
4330 * parse an XML public literal
4331 *
4332 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4333 *
4334 * Returns the PubidLiteral parsed or NULL.
4335 */
4336
4337xmlChar *
4338xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4339 xmlChar *buf = NULL;
4340 int len = 0;
4341 int size = XML_PARSER_BUFFER_SIZE;
4342 xmlChar cur;
4343 xmlChar stop;
4344 int count = 0;
4345 xmlParserInputState oldstate = ctxt->instate;
4346
4347 SHRINK;
4348 if (RAW == '"') {
4349 NEXT;
4350 stop = '"';
4351 } else if (RAW == '\'') {
4352 NEXT;
4353 stop = '\'';
4354 } else {
4355 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4356 return(NULL);
4357 }
4358 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4359 if (buf == NULL) {
4360 xmlErrMemory(ctxt, NULL);
4361 return(NULL);
4362 }
4363 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4364 cur = CUR;
4365 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4366 if (len + 1 >= size) {
4367 xmlChar *tmp;
4368
4369 if ((size > XML_MAX_NAME_LENGTH) &&
4370 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4371 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4372 xmlFree(buf);
4373 return(NULL);
4374 }
4375 size *= 2;
4376 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4377 if (tmp == NULL) {
4378 xmlErrMemory(ctxt, NULL);
4379 xmlFree(buf);
4380 return(NULL);
4381 }
4382 buf = tmp;
4383 }
4384 buf[len++] = cur;
4385 count++;
4386 if (count > 50) {
4387 GROW;
4388 count = 0;
4389 if (ctxt->instate == XML_PARSER_EOF) {
4390 xmlFree(buf);
4391 return(NULL);
4392 }
4393 }
4394 NEXT;
4395 cur = CUR;
4396 if (cur == 0) {
4397 GROW;
4398 SHRINK;
4399 cur = CUR;
4400 }
4401 }
4402 buf[len] = 0;
4403 if (cur != stop) {
4404 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4405 } else {
4406 NEXT;
4407 }
4408 ctxt->instate = oldstate;
4409 return(buf);
4410}
4411
4412static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4413
4414/*
4415 * used for the test in the inner loop of the char data testing
4416 */
4417static const unsigned char test_char_data[256] = {
4418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4420 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4421 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4422 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4423 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4424 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4425 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4426 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4427 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4428 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4429 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4430 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4431 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4432 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4433 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4434 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4435 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4450};
4451
4452/**
4453 * xmlParseCharData:
4454 * @ctxt: an XML parser context
4455 * @cdata: int indicating whether we are within a CDATA section
4456 *
4457 * parse a CharData section.
4458 * if we are within a CDATA section ']]>' marks an end of section.
4459 *
4460 * The right angle bracket (>) may be represented using the string "&gt;",
4461 * and must, for compatibility, be escaped using "&gt;" or a character
4462 * reference when it appears in the string "]]>" in content, when that
4463 * string is not marking the end of a CDATA section.
4464 *
4465 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4466 */
4467
4468void
4469xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4470 const xmlChar *in;
4471 int nbchar = 0;
4472 int line = ctxt->input->line;
4473 int col = ctxt->input->col;
4474 int ccol;
4475
4476 SHRINK;
4477 GROW;
4478 /*
4479 * Accelerated common case where input don't need to be
4480 * modified before passing it to the handler.
4481 */
4482 if (!cdata) {
4483 in = ctxt->input->cur;
4484 do {
4485get_more_space:
4486 while (*in == 0x20) { in++; ctxt->input->col++; }
4487 if (*in == 0xA) {
4488 do {
4489 ctxt->input->line++; ctxt->input->col = 1;
4490 in++;
4491 } while (*in == 0xA);
4492 goto get_more_space;
4493 }
4494 if (*in == '<') {
4495 nbchar = in - ctxt->input->cur;
4496 if (nbchar > 0) {
4497 const xmlChar *tmp = ctxt->input->cur;
4498 ctxt->input->cur = in;
4499
4500 if ((ctxt->sax != NULL) &&
4501 (ctxt->sax->ignorableWhitespace !=
4502 ctxt->sax->characters)) {
4503 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4504 if (ctxt->sax->ignorableWhitespace != NULL)
4505 ctxt->sax->ignorableWhitespace(ctxt->userData,
4506 tmp, nbchar);
4507 } else {
4508 if (ctxt->sax->characters != NULL)
4509 ctxt->sax->characters(ctxt->userData,
4510 tmp, nbchar);
4511 if (*ctxt->space == -1)
4512 *ctxt->space = -2;
4513 }
4514 } else if ((ctxt->sax != NULL) &&
4515 (ctxt->sax->characters != NULL)) {
4516 ctxt->sax->characters(ctxt->userData,
4517 tmp, nbchar);
4518 }
4519 }
4520 return;
4521 }
4522
4523get_more:
4524 ccol = ctxt->input->col;
4525 while (test_char_data[*in]) {
4526 in++;
4527 ccol++;
4528 }
4529 ctxt->input->col = ccol;
4530 if (*in == 0xA) {
4531 do {
4532 ctxt->input->line++; ctxt->input->col = 1;
4533 in++;
4534 } while (*in == 0xA);
4535 goto get_more;
4536 }
4537 if (*in == ']') {
4538 if ((in[1] == ']') && (in[2] == '>')) {
4539 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4540 ctxt->input->cur = in;
4541 return;
4542 }
4543 in++;
4544 ctxt->input->col++;
4545 goto get_more;
4546 }
4547 nbchar = in - ctxt->input->cur;
4548 if (nbchar > 0) {
4549 if ((ctxt->sax != NULL) &&
4550 (ctxt->sax->ignorableWhitespace !=
4551 ctxt->sax->characters) &&
4552 (IS_BLANK_CH(*ctxt->input->cur))) {
4553 const xmlChar *tmp = ctxt->input->cur;
4554 ctxt->input->cur = in;
4555
4556 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4557 if (ctxt->sax->ignorableWhitespace != NULL)
4558 ctxt->sax->ignorableWhitespace(ctxt->userData,
4559 tmp, nbchar);
4560 } else {
4561 if (ctxt->sax->characters != NULL)
4562 ctxt->sax->characters(ctxt->userData,
4563 tmp, nbchar);
4564 if (*ctxt->space == -1)
4565 *ctxt->space = -2;
4566 }
4567 line = ctxt->input->line;
4568 col = ctxt->input->col;
4569 } else if (ctxt->sax != NULL) {
4570 if (ctxt->sax->characters != NULL)
4571 ctxt->sax->characters(ctxt->userData,
4572 ctxt->input->cur, nbchar);
4573 line = ctxt->input->line;
4574 col = ctxt->input->col;
4575 }
4576 /* something really bad happened in the SAX callback */
4577 if (ctxt->instate != XML_PARSER_CONTENT)
4578 return;
4579 }
4580 ctxt->input->cur = in;
4581 if (*in == 0xD) {
4582 in++;
4583 if (*in == 0xA) {
4584 ctxt->input->cur = in;
4585 in++;
4586 ctxt->input->line++; ctxt->input->col = 1;
4587 continue; /* while */
4588 }
4589 in--;
4590 }
4591 if (*in == '<') {
4592 return;
4593 }
4594 if (*in == '&') {
4595 return;
4596 }
4597 SHRINK;
4598 GROW;
4599 if (ctxt->instate == XML_PARSER_EOF)
4600 return;
4601 in = ctxt->input->cur;
4602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4603 nbchar = 0;
4604 }
4605 ctxt->input->line = line;
4606 ctxt->input->col = col;
4607 xmlParseCharDataComplex(ctxt, cdata);
4608}
4609
4610/**
4611 * xmlParseCharDataComplex:
4612 * @ctxt: an XML parser context
4613 * @cdata: int indicating whether we are within a CDATA section
4614 *
4615 * parse a CharData section.this is the fallback function
4616 * of xmlParseCharData() when the parsing requires handling
4617 * of non-ASCII characters.
4618 */
4619static void
4620xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4622 int nbchar = 0;
4623 int cur, l;
4624 int count = 0;
4625
4626 SHRINK;
4627 GROW;
4628 cur = CUR_CHAR(l);
4629 while ((cur != '<') && /* checked */
4630 (cur != '&') &&
4631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4632 if ((cur == ']') && (NXT(1) == ']') &&
4633 (NXT(2) == '>')) {
4634 if (cdata) break;
4635 else {
4636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4637 }
4638 }
4639 COPY_BUF(l,buf,nbchar,cur);
4640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4641 buf[nbchar] = 0;
4642
4643 /*
4644 * OK the segment is to be consumed as chars.
4645 */
4646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4647 if (areBlanks(ctxt, buf, nbchar, 0)) {
4648 if (ctxt->sax->ignorableWhitespace != NULL)
4649 ctxt->sax->ignorableWhitespace(ctxt->userData,
4650 buf, nbchar);
4651 } else {
4652 if (ctxt->sax->characters != NULL)
4653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4654 if ((ctxt->sax->characters !=
4655 ctxt->sax->ignorableWhitespace) &&
4656 (*ctxt->space == -1))
4657 *ctxt->space = -2;
4658 }
4659 }
4660 nbchar = 0;
4661 /* something really bad happened in the SAX callback */
4662 if (ctxt->instate != XML_PARSER_CONTENT)
4663 return;
4664 }
4665 count++;
4666 if (count > 50) {
4667 GROW;
4668 count = 0;
4669 if (ctxt->instate == XML_PARSER_EOF)
4670 return;
4671 }
4672 NEXTL(l);
4673 cur = CUR_CHAR(l);
4674 }
4675 if (nbchar != 0) {
4676 buf[nbchar] = 0;
4677 /*
4678 * OK the segment is to be consumed as chars.
4679 */
4680 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4681 if (areBlanks(ctxt, buf, nbchar, 0)) {
4682 if (ctxt->sax->ignorableWhitespace != NULL)
4683 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4684 } else {
4685 if (ctxt->sax->characters != NULL)
4686 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4687 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4688 (*ctxt->space == -1))
4689 *ctxt->space = -2;
4690 }
4691 }
4692 }
4693 if ((cur != 0) && (!IS_CHAR(cur))) {
4694 /* Generate the error and skip the offending character */
4695 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4696 "PCDATA invalid Char value %d\n",
4697 cur);
4698 NEXTL(l);
4699 }
4700}
4701
4702/**
4703 * xmlParseExternalID:
4704 * @ctxt: an XML parser context
4705 * @publicID: a xmlChar** receiving PubidLiteral
4706 * @strict: indicate whether we should restrict parsing to only
4707 * production [75], see NOTE below
4708 *
4709 * Parse an External ID or a Public ID
4710 *
4711 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4712 * 'PUBLIC' S PubidLiteral S SystemLiteral
4713 *
4714 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4715 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4716 *
4717 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4718 *
4719 * Returns the function returns SystemLiteral and in the second
4720 * case publicID receives PubidLiteral, is strict is off
4721 * it is possible to return NULL and have publicID set.
4722 */
4723
4724xmlChar *
4725xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4726 xmlChar *URI = NULL;
4727
4728 SHRINK;
4729
4730 *publicID = NULL;
4731 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4732 SKIP(6);
4733 if (!IS_BLANK_CH(CUR)) {
4734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4735 "Space required after 'SYSTEM'\n");
4736 }
4737 SKIP_BLANKS;
4738 URI = xmlParseSystemLiteral(ctxt);
4739 if (URI == NULL) {
4740 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4741 }
4742 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4743 SKIP(6);
4744 if (!IS_BLANK_CH(CUR)) {
4745 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4746 "Space required after 'PUBLIC'\n");
4747 }
4748 SKIP_BLANKS;
4749 *publicID = xmlParsePubidLiteral(ctxt);
4750 if (*publicID == NULL) {
4751 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4752 }
4753 if (strict) {
4754 /*
4755 * We don't handle [83] so "S SystemLiteral" is required.
4756 */
4757 if (!IS_BLANK_CH(CUR)) {
4758 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4759 "Space required after the Public Identifier\n");
4760 }
4761 } else {
4762 /*
4763 * We handle [83] so we return immediately, if
4764 * "S SystemLiteral" is not detected. From a purely parsing
4765 * point of view that's a nice mess.
4766 */
4767 const xmlChar *ptr;
4768 GROW;
4769
4770 ptr = CUR_PTR;
4771 if (!IS_BLANK_CH(*ptr)) return(NULL);
4772
4773 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
4774 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4775 }
4776 SKIP_BLANKS;
4777 URI = xmlParseSystemLiteral(ctxt);
4778 if (URI == NULL) {
4779 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780 }
4781 }
4782 return(URI);
4783}
4784
4785/**
4786 * xmlParseCommentComplex:
4787 * @ctxt: an XML parser context
4788 * @buf: the already parsed part of the buffer
4789 * @len: number of bytes filles in the buffer
4790 * @size: allocated size of the buffer
4791 *
4792 * Skip an XML (SGML) comment <!-- .... -->
4793 * The spec says that "For compatibility, the string "--" (double-hyphen)
4794 * must not occur within comments. "
4795 * This is the slow routine in case the accelerator for ascii didn't work
4796 *
4797 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798 */
4799static void
4800xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801 size_t len, size_t size) {
4802 int q, ql;
4803 int r, rl;
4804 int cur, l;
4805 size_t count = 0;
4806 int inputid;
4807
4808 inputid = ctxt->input->id;
4809
4810 if (buf == NULL) {
4811 len = 0;
4812 size = XML_PARSER_BUFFER_SIZE;
4813 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4814 if (buf == NULL) {
4815 xmlErrMemory(ctxt, NULL);
4816 return;
4817 }
4818 }
4819 GROW; /* Assure there's enough input data */
4820 q = CUR_CHAR(ql);
4821 if (q == 0)
4822 goto not_terminated;
4823 if (!IS_CHAR(q)) {
4824 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4825 "xmlParseComment: invalid xmlChar value %d\n",
4826 q);
4827 xmlFree (buf);
4828 return;
4829 }
4830 NEXTL(ql);
4831 r = CUR_CHAR(rl);
4832 if (r == 0)
4833 goto not_terminated;
4834 if (!IS_CHAR(r)) {
4835 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4836 "xmlParseComment: invalid xmlChar value %d\n",
4837 q);
4838 xmlFree (buf);
4839 return;
4840 }
4841 NEXTL(rl);
4842 cur = CUR_CHAR(l);
4843 if (cur == 0)
4844 goto not_terminated;
4845 while (IS_CHAR(cur) && /* checked */
4846 ((cur != '>') ||
4847 (r != '-') || (q != '-'))) {
4848 if ((r == '-') && (q == '-')) {
4849 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4850 }
4851 if ((len > XML_MAX_TEXT_LENGTH) &&
4852 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4853 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4854 "Comment too big found", NULL);
4855 xmlFree (buf);
4856 return;
4857 }
4858 if (len + 5 >= size) {
4859 xmlChar *new_buf;
4860 size_t new_size;
4861
4862 new_size = size * 2;
4863 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4864 if (new_buf == NULL) {
4865 xmlFree (buf);
4866 xmlErrMemory(ctxt, NULL);
4867 return;
4868 }
4869 buf = new_buf;
4870 size = new_size;
4871 }
4872 COPY_BUF(ql,buf,len,q);
4873 q = r;
4874 ql = rl;
4875 r = cur;
4876 rl = l;
4877
4878 count++;
4879 if (count > 50) {
4880 GROW;
4881 count = 0;
4882 if (ctxt->instate == XML_PARSER_EOF) {
4883 xmlFree(buf);
4884 return;
4885 }
4886 }
4887 NEXTL(l);
4888 cur = CUR_CHAR(l);
4889 if (cur == 0) {
4890 SHRINK;
4891 GROW;
4892 cur = CUR_CHAR(l);
4893 }
4894 }
4895 buf[len] = 0;
4896 if (cur == 0) {
4897 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4898 "Comment not terminated \n<!--%.50s\n", buf);
4899 } else if (!IS_CHAR(cur)) {
4900 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4901 "xmlParseComment: invalid xmlChar value %d\n",
4902 cur);
4903 } else {
4904 if (inputid != ctxt->input->id) {
4905 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4906 "Comment doesn't start and stop in the same entity\n");
4907 }
4908 NEXT;
4909 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4910 (!ctxt->disableSAX))
4911 ctxt->sax->comment(ctxt->userData, buf);
4912 }
4913 xmlFree(buf);
4914 return;
4915not_terminated:
4916 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4917 "Comment not terminated\n", NULL);
4918 xmlFree(buf);
4919 return;
4920}
4921
4922/**
4923 * xmlParseComment:
4924 * @ctxt: an XML parser context
4925 *
4926 * Skip an XML (SGML) comment <!-- .... -->
4927 * The spec says that "For compatibility, the string "--" (double-hyphen)
4928 * must not occur within comments. "
4929 *
4930 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4931 */
4932void
4933xmlParseComment(xmlParserCtxtPtr ctxt) {
4934 xmlChar *buf = NULL;
4935 size_t size = XML_PARSER_BUFFER_SIZE;
4936 size_t len = 0;
4937 xmlParserInputState state;
4938 const xmlChar *in;
4939 size_t nbchar = 0;
4940 int ccol;
4941 int inputid;
4942
4943 /*
4944 * Check that there is a comment right here.
4945 */
4946 if ((RAW != '<') || (NXT(1) != '!') ||
4947 (NXT(2) != '-') || (NXT(3) != '-')) return;
4948 state = ctxt->instate;
4949 ctxt->instate = XML_PARSER_COMMENT;
4950 inputid = ctxt->input->id;
4951 SKIP(4);
4952 SHRINK;
4953 GROW;
4954
4955 /*
4956 * Accelerated common case where input don't need to be
4957 * modified before passing it to the handler.
4958 */
4959 in = ctxt->input->cur;
4960 do {
4961 if (*in == 0xA) {
4962 do {
4963 ctxt->input->line++; ctxt->input->col = 1;
4964 in++;
4965 } while (*in == 0xA);
4966 }
4967get_more:
4968 ccol = ctxt->input->col;
4969 while (((*in > '-') && (*in <= 0x7F)) ||
4970 ((*in >= 0x20) && (*in < '-')) ||
4971 (*in == 0x09)) {
4972 in++;
4973 ccol++;
4974 }
4975 ctxt->input->col = ccol;
4976 if (*in == 0xA) {
4977 do {
4978 ctxt->input->line++; ctxt->input->col = 1;
4979 in++;
4980 } while (*in == 0xA);
4981 goto get_more;
4982 }
4983 nbchar = in - ctxt->input->cur;
4984 /*
4985 * save current set of data
4986 */
4987 if (nbchar > 0) {
4988 if ((ctxt->sax != NULL) &&
4989 (ctxt->sax->comment != NULL)) {
4990 if (buf == NULL) {
4991 if ((*in == '-') && (in[1] == '-'))
4992 size = nbchar + 1;
4993 else
4994 size = XML_PARSER_BUFFER_SIZE + nbchar;
4995 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4996 if (buf == NULL) {
4997 xmlErrMemory(ctxt, NULL);
4998 ctxt->instate = state;
4999 return;
5000 }
5001 len = 0;
5002 } else if (len + nbchar + 1 >= size) {
5003 xmlChar *new_buf;
5004 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5005 new_buf = (xmlChar *) xmlRealloc(buf,
5006 size * sizeof(xmlChar));
5007 if (new_buf == NULL) {
5008 xmlFree (buf);
5009 xmlErrMemory(ctxt, NULL);
5010 ctxt->instate = state;
5011 return;
5012 }
5013 buf = new_buf;
5014 }
5015 memcpy(&buf[len], ctxt->input->cur, nbchar);
5016 len += nbchar;
5017 buf[len] = 0;
5018 }
5019 }
5020 if ((len > XML_MAX_TEXT_LENGTH) &&
5021 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5022 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5023 "Comment too big found", NULL);
5024 xmlFree (buf);
5025 return;
5026 }
5027 ctxt->input->cur = in;
5028 if (*in == 0xA) {
5029 in++;
5030 ctxt->input->line++; ctxt->input->col = 1;
5031 }
5032 if (*in == 0xD) {
5033 in++;
5034 if (*in == 0xA) {
5035 ctxt->input->cur = in;
5036 in++;
5037 ctxt->input->line++; ctxt->input->col = 1;
5038 continue; /* while */
5039 }
5040 in--;
5041 }
5042 SHRINK;
5043 GROW;
5044 if (ctxt->instate == XML_PARSER_EOF) {
5045 xmlFree(buf);
5046 return;
5047 }
5048 in = ctxt->input->cur;
5049 if (*in == '-') {
5050 if (in[1] == '-') {
5051 if (in[2] == '>') {
5052 if (ctxt->input->id != inputid) {
5053 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5054 "comment doesn't start and stop in the same entity\n");
5055 }
5056 SKIP(3);
5057 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5058 (!ctxt->disableSAX)) {
5059 if (buf != NULL)
5060 ctxt->sax->comment(ctxt->userData, buf);
5061 else
5062 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5063 }
5064 if (buf != NULL)
5065 xmlFree(buf);
5066 if (ctxt->instate != XML_PARSER_EOF)
5067 ctxt->instate = state;
5068 return;
5069 }
5070 if (buf != NULL) {
5071 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5072 "Double hyphen within comment: "
5073 "<!--%.50s\n",
5074 buf);
5075 } else
5076 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5077 "Double hyphen within comment\n", NULL);
5078 in++;
5079 ctxt->input->col++;
5080 }
5081 in++;
5082 ctxt->input->col++;
5083 goto get_more;
5084 }
5085 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5086 xmlParseCommentComplex(ctxt, buf, len, size);
5087 ctxt->instate = state;
5088 return;
5089}
5090
5091
5092/**
5093 * xmlParsePITarget:
5094 * @ctxt: an XML parser context
5095 *
5096 * parse the name of a PI
5097 *
5098 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5099 *
5100 * Returns the PITarget name or NULL
5101 */
5102
5103const xmlChar *
5104xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5105 const xmlChar *name;
5106
5107 name = xmlParseName(ctxt);
5108 if ((name != NULL) &&
5109 ((name[0] == 'x') || (name[0] == 'X')) &&
5110 ((name[1] == 'm') || (name[1] == 'M')) &&
5111 ((name[2] == 'l') || (name[2] == 'L'))) {
5112 int i;
5113 if ((name[0] == 'x') && (name[1] == 'm') &&
5114 (name[2] == 'l') && (name[3] == 0)) {
5115 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5116 "XML declaration allowed only at the start of the document\n");
5117 return(name);
5118 } else if (name[3] == 0) {
5119 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5120 return(name);
5121 }
5122 for (i = 0;;i++) {
5123 if (xmlW3CPIs[i] == NULL) break;
5124 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5125 return(name);
5126 }
5127 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5128 "xmlParsePITarget: invalid name prefix 'xml'\n",
5129 NULL, NULL);
5130 }
5131 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5132 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5133 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5134 }
5135 return(name);
5136}
5137
5138#ifdef LIBXML_CATALOG_ENABLED
5139/**
5140 * xmlParseCatalogPI:
5141 * @ctxt: an XML parser context
5142 * @catalog: the PI value string
5143 *
5144 * parse an XML Catalog Processing Instruction.
5145 *
5146 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5147 *
5148 * Occurs only if allowed by the user and if happening in the Misc
5149 * part of the document before any doctype informations
5150 * This will add the given catalog to the parsing context in order
5151 * to be used if there is a resolution need further down in the document
5152 */
5153
5154static void
5155xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5156 xmlChar *URL = NULL;
5157 const xmlChar *tmp, *base;
5158 xmlChar marker;
5159
5160 tmp = catalog;
5161 while (IS_BLANK_CH(*tmp)) tmp++;
5162 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5163 goto error;
5164 tmp += 7;
5165 while (IS_BLANK_CH(*tmp)) tmp++;
5166 if (*tmp != '=') {
5167 return;
5168 }
5169 tmp++;
5170 while (IS_BLANK_CH(*tmp)) tmp++;
5171 marker = *tmp;
5172 if ((marker != '\'') && (marker != '"'))
5173 goto error;
5174 tmp++;
5175 base = tmp;
5176 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5177 if (*tmp == 0)
5178 goto error;
5179 URL = xmlStrndup(base, tmp - base);
5180 tmp++;
5181 while (IS_BLANK_CH(*tmp)) tmp++;
5182 if (*tmp != 0)
5183 goto error;
5184
5185 if (URL != NULL) {
5186 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5187 xmlFree(URL);
5188 }
5189 return;
5190
5191error:
5192 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5193 "Catalog PI syntax error: %s\n",
5194 catalog, NULL);
5195 if (URL != NULL)
5196 xmlFree(URL);
5197}
5198#endif
5199
5200/**
5201 * xmlParsePI:
5202 * @ctxt: an XML parser context
5203 *
5204 * parse an XML Processing Instruction.
5205 *
5206 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5207 *
5208 * The processing is transfered to SAX once parsed.
5209 */
5210
5211void
5212xmlParsePI(xmlParserCtxtPtr ctxt) {
5213 xmlChar *buf = NULL;
5214 size_t len = 0;
5215 size_t size = XML_PARSER_BUFFER_SIZE;
5216 int cur, l;
5217 const xmlChar *target;
5218 xmlParserInputState state;
5219 int count = 0;
5220
5221 if ((RAW == '<') && (NXT(1) == '?')) {
5222 xmlParserInputPtr input = ctxt->input;
5223 state = ctxt->instate;
5224 ctxt->instate = XML_PARSER_PI;
5225 /*
5226 * this is a Processing Instruction.
5227 */
5228 SKIP(2);
5229 SHRINK;
5230
5231 /*
5232 * Parse the target name and check for special support like
5233 * namespace.
5234 */
5235 target = xmlParsePITarget(ctxt);
5236 if (target != NULL) {
5237 if ((RAW == '?') && (NXT(1) == '>')) {
5238 if (input != ctxt->input) {
5239 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5240 "PI declaration doesn't start and stop in the same entity\n");
5241 }
5242 SKIP(2);
5243
5244 /*
5245 * SAX: PI detected.
5246 */
5247 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5248 (ctxt->sax->processingInstruction != NULL))
5249 ctxt->sax->processingInstruction(ctxt->userData,
5250 target, NULL);
5251 if (ctxt->instate != XML_PARSER_EOF)
5252 ctxt->instate = state;
5253 return;
5254 }
5255 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5256 if (buf == NULL) {
5257 xmlErrMemory(ctxt, NULL);
5258 ctxt->instate = state;
5259 return;
5260 }
5261 cur = CUR;
5262 if (!IS_BLANK(cur)) {
5263 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5264 "ParsePI: PI %s space expected\n", target);
5265 }
5266 SKIP_BLANKS;
5267 cur = CUR_CHAR(l);
5268 while (IS_CHAR(cur) && /* checked */
5269 ((cur != '?') || (NXT(1) != '>'))) {
5270 if (len + 5 >= size) {
5271 xmlChar *tmp;
5272 size_t new_size = size * 2;
5273 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5274 if (tmp == NULL) {
5275 xmlErrMemory(ctxt, NULL);
5276 xmlFree(buf);
5277 ctxt->instate = state;
5278 return;
5279 }
5280 buf = tmp;
5281 size = new_size;
5282 }
5283 count++;
5284 if (count > 50) {
5285 GROW;
5286 if (ctxt->instate == XML_PARSER_EOF) {
5287 xmlFree(buf);
5288 return;
5289 }
5290 count = 0;
5291 if ((len > XML_MAX_TEXT_LENGTH) &&
5292 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5293 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5294 "PI %s too big found", target);
5295 xmlFree(buf);
5296 ctxt->instate = state;
5297 return;
5298 }
5299 }
5300 COPY_BUF(l,buf,len,cur);
5301 NEXTL(l);
5302 cur = CUR_CHAR(l);
5303 if (cur == 0) {
5304 SHRINK;
5305 GROW;
5306 cur = CUR_CHAR(l);
5307 }
5308 }
5309 if ((len > XML_MAX_TEXT_LENGTH) &&
5310 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5311 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5312 "PI %s too big found", target);
5313 xmlFree(buf);
5314 ctxt->instate = state;
5315 return;
5316 }
5317 buf[len] = 0;
5318 if (cur != '?') {
5319 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5320 "ParsePI: PI %s never end ...\n", target);
5321 } else {
5322 if (input != ctxt->input) {
5323 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5324 "PI declaration doesn't start and stop in the same entity\n");
5325 }
5326 SKIP(2);
5327
5328#ifdef LIBXML_CATALOG_ENABLED
5329 if (((state == XML_PARSER_MISC) ||
5330 (state == XML_PARSER_START)) &&
5331 (xmlStrEqual(target, XML_CATALOG_PI))) {
5332 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5333 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5334 (allow == XML_CATA_ALLOW_ALL))
5335 xmlParseCatalogPI(ctxt, buf);
5336 }
5337#endif
5338
5339
5340 /*
5341 * SAX: PI detected.
5342 */
5343 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5344 (ctxt->sax->processingInstruction != NULL))
5345 ctxt->sax->processingInstruction(ctxt->userData,
5346 target, buf);
5347 }
5348 xmlFree(buf);
5349 } else {
5350 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5351 }
5352 if (ctxt->instate != XML_PARSER_EOF)
5353 ctxt->instate = state;
5354 }
5355}
5356
5357/**
5358 * xmlParseNotationDecl:
5359 * @ctxt: an XML parser context
5360 *
5361 * parse a notation declaration
5362 *
5363 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5364 *
5365 * Hence there is actually 3 choices:
5366 * 'PUBLIC' S PubidLiteral
5367 * 'PUBLIC' S PubidLiteral S SystemLiteral
5368 * and 'SYSTEM' S SystemLiteral
5369 *
5370 * See the NOTE on xmlParseExternalID().
5371 */
5372
5373void
5374xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5375 const xmlChar *name;
5376 xmlChar *Pubid;
5377 xmlChar *Systemid;
5378
5379 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5380 xmlParserInputPtr input = ctxt->input;
5381 SHRINK;
5382 SKIP(10);
5383 if (!IS_BLANK_CH(CUR)) {
5384 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385 "Space required after '<!NOTATION'\n");
5386 return;
5387 }
5388 SKIP_BLANKS;
5389
5390 name = xmlParseName(ctxt);
5391 if (name == NULL) {
5392 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5393 return;
5394 }
5395 if (!IS_BLANK_CH(CUR)) {
5396 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5397 "Space required after the NOTATION name'\n");
5398 return;
5399 }
5400 if (xmlStrchr(name, ':') != NULL) {
5401 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5402 "colons are forbidden from notation names '%s'\n",
5403 name, NULL, NULL);
5404 }
5405 SKIP_BLANKS;
5406
5407 /*
5408 * Parse the IDs.
5409 */
5410 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5411 SKIP_BLANKS;
5412
5413 if (RAW == '>') {
5414 if (input != ctxt->input) {
5415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5416 "Notation declaration doesn't start and stop in the same entity\n");
5417 }
5418 NEXT;
5419 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5420 (ctxt->sax->notationDecl != NULL))
5421 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5422 } else {
5423 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5424 }
5425 if (Systemid != NULL) xmlFree(Systemid);
5426 if (Pubid != NULL) xmlFree(Pubid);
5427 }
5428}
5429
5430/**
5431 * xmlParseEntityDecl:
5432 * @ctxt: an XML parser context
5433 *
5434 * parse <!ENTITY declarations
5435 *
5436 * [70] EntityDecl ::= GEDecl | PEDecl
5437 *
5438 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5439 *
5440 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5441 *
5442 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5443 *
5444 * [74] PEDef ::= EntityValue | ExternalID
5445 *
5446 * [76] NDataDecl ::= S 'NDATA' S Name
5447 *
5448 * [ VC: Notation Declared ]
5449 * The Name must match the declared name of a notation.
5450 */
5451
5452void
5453xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5454 const xmlChar *name = NULL;
5455 xmlChar *value = NULL;
5456 xmlChar *URI = NULL, *literal = NULL;
5457 const xmlChar *ndata = NULL;
5458 int isParameter = 0;
5459 xmlChar *orig = NULL;
5460 int skipped;
5461
5462 /* GROW; done in the caller */
5463 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5464 xmlParserInputPtr input = ctxt->input;
5465 SHRINK;
5466 SKIP(8);
5467 skipped = SKIP_BLANKS;
5468 if (skipped == 0) {
5469 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5470 "Space required after '<!ENTITY'\n");
5471 }
5472
5473 if (RAW == '%') {
5474 NEXT;
5475 skipped = SKIP_BLANKS;
5476 if (skipped == 0) {
5477 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5478 "Space required after '%'\n");
5479 }
5480 isParameter = 1;
5481 }
5482
5483 name = xmlParseName(ctxt);
5484 if (name == NULL) {
5485 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5486 "xmlParseEntityDecl: no name\n");
5487 return;
5488 }
5489 if (xmlStrchr(name, ':') != NULL) {
5490 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5491 "colons are forbidden from entities names '%s'\n",
5492 name, NULL, NULL);
5493 }
5494 skipped = SKIP_BLANKS;
5495 if (skipped == 0) {
5496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5497 "Space required after the entity name\n");
5498 }
5499
5500 ctxt->instate = XML_PARSER_ENTITY_DECL;
5501 /*
5502 * handle the various case of definitions...
5503 */
5504 if (isParameter) {
5505 if ((RAW == '"') || (RAW == '\'')) {
5506 value = xmlParseEntityValue(ctxt, &orig);
5507 if (value) {
5508 if ((ctxt->sax != NULL) &&
5509 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5510 ctxt->sax->entityDecl(ctxt->userData, name,
5511 XML_INTERNAL_PARAMETER_ENTITY,
5512 NULL, NULL, value);
5513 }
5514 } else {
5515 URI = xmlParseExternalID(ctxt, &literal, 1);
5516 if ((URI == NULL) && (literal == NULL)) {
5517 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5518 }
5519 if (URI) {
5520 xmlURIPtr uri;
5521
5522 uri = xmlParseURI((const char *) URI);
5523 if (uri == NULL) {
5524 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5525 "Invalid URI: %s\n", URI);
5526 /*
5527 * This really ought to be a well formedness error
5528 * but the XML Core WG decided otherwise c.f. issue
5529 * E26 of the XML erratas.
5530 */
5531 } else {
5532 if (uri->fragment != NULL) {
5533 /*
5534 * Okay this is foolish to block those but not
5535 * invalid URIs.
5536 */
5537 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5538 } else {
5539 if ((ctxt->sax != NULL) &&
5540 (!ctxt->disableSAX) &&
5541 (ctxt->sax->entityDecl != NULL))
5542 ctxt->sax->entityDecl(ctxt->userData, name,
5543 XML_EXTERNAL_PARAMETER_ENTITY,
5544 literal, URI, NULL);
5545 }
5546 xmlFreeURI(uri);
5547 }
5548 }
5549 }
5550 } else {
5551 if ((RAW == '"') || (RAW == '\'')) {
5552 value = xmlParseEntityValue(ctxt, &orig);
5553 if ((ctxt->sax != NULL) &&
5554 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5555 ctxt->sax->entityDecl(ctxt->userData, name,
5556 XML_INTERNAL_GENERAL_ENTITY,
5557 NULL, NULL, value);
5558 /*
5559 * For expat compatibility in SAX mode.
5560 */
5561 if ((ctxt->myDoc == NULL) ||
5562 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5563 if (ctxt->myDoc == NULL) {
5564 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5565 if (ctxt->myDoc == NULL) {
5566 xmlErrMemory(ctxt, "New Doc failed");
5567 return;
5568 }
5569 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5570 }
5571 if (ctxt->myDoc->intSubset == NULL)
5572 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5573 BAD_CAST "fake", NULL, NULL);
5574
5575 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5576 NULL, NULL, value);
5577 }
5578 } else {
5579 URI = xmlParseExternalID(ctxt, &literal, 1);
5580 if ((URI == NULL) && (literal == NULL)) {
5581 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5582 }
5583 if (URI) {
5584 xmlURIPtr uri;
5585
5586 uri = xmlParseURI((const char *)URI);
5587 if (uri == NULL) {
5588 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5589 "Invalid URI: %s\n", URI);
5590 /*
5591 * This really ought to be a well formedness error
5592 * but the XML Core WG decided otherwise c.f. issue
5593 * E26 of the XML erratas.
5594 */
5595 } else {
5596 if (uri->fragment != NULL) {
5597 /*
5598 * Okay this is foolish to block those but not
5599 * invalid URIs.
5600 */
5601 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5602 }
5603 xmlFreeURI(uri);
5604 }
5605 }
5606 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
5607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5608 "Space required before 'NDATA'\n");
5609 }
5610 SKIP_BLANKS;
5611 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5612 SKIP(5);
5613 if (!IS_BLANK_CH(CUR)) {
5614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5615 "Space required after 'NDATA'\n");
5616 }
5617 SKIP_BLANKS;
5618 ndata = xmlParseName(ctxt);
5619 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5620 (ctxt->sax->unparsedEntityDecl != NULL))
5621 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5622 literal, URI, ndata);
5623 } else {
5624 if ((ctxt->sax != NULL) &&
5625 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5626 ctxt->sax->entityDecl(ctxt->userData, name,
5627 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5628 literal, URI, NULL);
5629 /*
5630 * For expat compatibility in SAX mode.
5631 * assuming the entity repalcement was asked for
5632 */
5633 if ((ctxt->replaceEntities != 0) &&
5634 ((ctxt->myDoc == NULL) ||
5635 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5636 if (ctxt->myDoc == NULL) {
5637 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5638 if (ctxt->myDoc == NULL) {
5639 xmlErrMemory(ctxt, "New Doc failed");
5640 return;
5641 }
5642 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5643 }
5644
5645 if (ctxt->myDoc->intSubset == NULL)
5646 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5647 BAD_CAST "fake", NULL, NULL);
5648 xmlSAX2EntityDecl(ctxt, name,
5649 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5650 literal, URI, NULL);
5651 }
5652 }
5653 }
5654 }
5655 if (ctxt->instate == XML_PARSER_EOF)
5656 return;
5657 SKIP_BLANKS;
5658 if (RAW != '>') {
5659 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5660 "xmlParseEntityDecl: entity %s not terminated\n", name);
5661 xmlStopParser(ctxt);
5662 } else {
5663 if (input != ctxt->input) {
5664 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5665 "Entity declaration doesn't start and stop in the same entity\n");
5666 }
5667 NEXT;
5668 }
5669 if (orig != NULL) {
5670 /*
5671 * Ugly mechanism to save the raw entity value.
5672 */
5673 xmlEntityPtr cur = NULL;
5674
5675 if (isParameter) {
5676 if ((ctxt->sax != NULL) &&
5677 (ctxt->sax->getParameterEntity != NULL))
5678 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5679 } else {
5680 if ((ctxt->sax != NULL) &&
5681 (ctxt->sax->getEntity != NULL))
5682 cur = ctxt->sax->getEntity(ctxt->userData, name);
5683 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5684 cur = xmlSAX2GetEntity(ctxt, name);
5685 }
5686 }
5687 if (cur != NULL) {
5688 if (cur->orig != NULL)
5689 xmlFree(orig);
5690 else
5691 cur->orig = orig;
5692 } else
5693 xmlFree(orig);
5694 }
5695 if (value != NULL) xmlFree(value);
5696 if (URI != NULL) xmlFree(URI);
5697 if (literal != NULL) xmlFree(literal);
5698 }
5699}
5700
5701/**
5702 * xmlParseDefaultDecl:
5703 * @ctxt: an XML parser context
5704 * @value: Receive a possible fixed default value for the attribute
5705 *
5706 * Parse an attribute default declaration
5707 *
5708 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5709 *
5710 * [ VC: Required Attribute ]
5711 * if the default declaration is the keyword #REQUIRED, then the
5712 * attribute must be specified for all elements of the type in the
5713 * attribute-list declaration.
5714 *
5715 * [ VC: Attribute Default Legal ]
5716 * The declared default value must meet the lexical constraints of
5717 * the declared attribute type c.f. xmlValidateAttributeDecl()
5718 *
5719 * [ VC: Fixed Attribute Default ]
5720 * if an attribute has a default value declared with the #FIXED
5721 * keyword, instances of that attribute must match the default value.
5722 *
5723 * [ WFC: No < in Attribute Values ]
5724 * handled in xmlParseAttValue()
5725 *
5726 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5727 * or XML_ATTRIBUTE_FIXED.
5728 */
5729
5730int
5731xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5732 int val;
5733 xmlChar *ret;
5734
5735 *value = NULL;
5736 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5737 SKIP(9);
5738 return(XML_ATTRIBUTE_REQUIRED);
5739 }
5740 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5741 SKIP(8);
5742 return(XML_ATTRIBUTE_IMPLIED);
5743 }
5744 val = XML_ATTRIBUTE_NONE;
5745 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5746 SKIP(6);
5747 val = XML_ATTRIBUTE_FIXED;
5748 if (!IS_BLANK_CH(CUR)) {
5749 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5750 "Space required after '#FIXED'\n");
5751 }
5752 SKIP_BLANKS;
5753 }
5754 ret = xmlParseAttValue(ctxt);
5755 ctxt->instate = XML_PARSER_DTD;
5756 if (ret == NULL) {
5757 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5758 "Attribute default value declaration error\n");
5759 } else
5760 *value = ret;
5761 return(val);
5762}
5763
5764/**
5765 * xmlParseNotationType:
5766 * @ctxt: an XML parser context
5767 *
5768 * parse an Notation attribute type.
5769 *
5770 * Note: the leading 'NOTATION' S part has already being parsed...
5771 *
5772 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5773 *
5774 * [ VC: Notation Attributes ]
5775 * Values of this type must match one of the notation names included
5776 * in the declaration; all notation names in the declaration must be declared.
5777 *
5778 * Returns: the notation attribute tree built while parsing
5779 */
5780
5781xmlEnumerationPtr
5782xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5783 const xmlChar *name;
5784 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5785
5786 if (RAW != '(') {
5787 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5788 return(NULL);
5789 }
5790 SHRINK;
5791 do {
5792 NEXT;
5793 SKIP_BLANKS;
5794 name = xmlParseName(ctxt);
5795 if (name == NULL) {
5796 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5797 "Name expected in NOTATION declaration\n");
5798 xmlFreeEnumeration(ret);
5799 return(NULL);
5800 }
5801 tmp = ret;
5802 while (tmp != NULL) {
5803 if (xmlStrEqual(name, tmp->name)) {
5804 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5805 "standalone: attribute notation value token %s duplicated\n",
5806 name, NULL);
5807 if (!xmlDictOwns(ctxt->dict, name))
5808 xmlFree((xmlChar *) name);
5809 break;
5810 }
5811 tmp = tmp->next;
5812 }
5813 if (tmp == NULL) {
5814 cur = xmlCreateEnumeration(name);
5815 if (cur == NULL) {
5816 xmlFreeEnumeration(ret);
5817 return(NULL);
5818 }
5819 if (last == NULL) ret = last = cur;
5820 else {
5821 last->next = cur;
5822 last = cur;
5823 }
5824 }
5825 SKIP_BLANKS;
5826 } while (RAW == '|');
5827 if (RAW != ')') {
5828 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5829 xmlFreeEnumeration(ret);
5830 return(NULL);
5831 }
5832 NEXT;
5833 return(ret);
5834}
5835
5836/**
5837 * xmlParseEnumerationType:
5838 * @ctxt: an XML parser context
5839 *
5840 * parse an Enumeration attribute type.
5841 *
5842 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5843 *
5844 * [ VC: Enumeration ]
5845 * Values of this type must match one of the Nmtoken tokens in
5846 * the declaration
5847 *
5848 * Returns: the enumeration attribute tree built while parsing
5849 */
5850
5851xmlEnumerationPtr
5852xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5853 xmlChar *name;
5854 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5855
5856 if (RAW != '(') {
5857 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5858 return(NULL);
5859 }
5860 SHRINK;
5861 do {
5862 NEXT;
5863 SKIP_BLANKS;
5864 name = xmlParseNmtoken(ctxt);
5865 if (name == NULL) {
5866 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5867 return(ret);
5868 }
5869 tmp = ret;
5870 while (tmp != NULL) {
5871 if (xmlStrEqual(name, tmp->name)) {
5872 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5873 "standalone: attribute enumeration value token %s duplicated\n",
5874 name, NULL);
5875 if (!xmlDictOwns(ctxt->dict, name))
5876 xmlFree(name);
5877 break;
5878 }
5879 tmp = tmp->next;
5880 }
5881 if (tmp == NULL) {
5882 cur = xmlCreateEnumeration(name);
5883 if (!xmlDictOwns(ctxt->dict, name))
5884 xmlFree(name);
5885 if (cur == NULL) {
5886 xmlFreeEnumeration(ret);
5887 return(NULL);
5888 }
5889 if (last == NULL) ret = last = cur;
5890 else {
5891 last->next = cur;
5892 last = cur;
5893 }
5894 }
5895 SKIP_BLANKS;
5896 } while (RAW == '|');
5897 if (RAW != ')') {
5898 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5899 return(ret);
5900 }
5901 NEXT;
5902 return(ret);
5903}
5904
5905/**
5906 * xmlParseEnumeratedType:
5907 * @ctxt: an XML parser context
5908 * @tree: the enumeration tree built while parsing
5909 *
5910 * parse an Enumerated attribute type.
5911 *
5912 * [57] EnumeratedType ::= NotationType | Enumeration
5913 *
5914 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5915 *
5916 *
5917 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5918 */
5919
5920int
5921xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5922 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5923 SKIP(8);
5924 if (!IS_BLANK_CH(CUR)) {
5925 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5926 "Space required after 'NOTATION'\n");
5927 return(0);
5928 }
5929 SKIP_BLANKS;
5930 *tree = xmlParseNotationType(ctxt);
5931 if (*tree == NULL) return(0);
5932 return(XML_ATTRIBUTE_NOTATION);
5933 }
5934 *tree = xmlParseEnumerationType(ctxt);
5935 if (*tree == NULL) return(0);
5936 return(XML_ATTRIBUTE_ENUMERATION);
5937}
5938
5939/**
5940 * xmlParseAttributeType:
5941 * @ctxt: an XML parser context
5942 * @tree: the enumeration tree built while parsing
5943 *
5944 * parse the Attribute list def for an element
5945 *
5946 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5947 *
5948 * [55] StringType ::= 'CDATA'
5949 *
5950 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5951 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5952 *
5953 * Validity constraints for attribute values syntax are checked in
5954 * xmlValidateAttributeValue()
5955 *
5956 * [ VC: ID ]
5957 * Values of type ID must match the Name production. A name must not
5958 * appear more than once in an XML document as a value of this type;
5959 * i.e., ID values must uniquely identify the elements which bear them.
5960 *
5961 * [ VC: One ID per Element Type ]
5962 * No element type may have more than one ID attribute specified.
5963 *
5964 * [ VC: ID Attribute Default ]
5965 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5966 *
5967 * [ VC: IDREF ]
5968 * Values of type IDREF must match the Name production, and values
5969 * of type IDREFS must match Names; each IDREF Name must match the value
5970 * of an ID attribute on some element in the XML document; i.e. IDREF
5971 * values must match the value of some ID attribute.
5972 *
5973 * [ VC: Entity Name ]
5974 * Values of type ENTITY must match the Name production, values
5975 * of type ENTITIES must match Names; each Entity Name must match the
5976 * name of an unparsed entity declared in the DTD.
5977 *
5978 * [ VC: Name Token ]
5979 * Values of type NMTOKEN must match the Nmtoken production; values
5980 * of type NMTOKENS must match Nmtokens.
5981 *
5982 * Returns the attribute type
5983 */
5984int
5985xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5986 SHRINK;
5987 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5988 SKIP(5);
5989 return(XML_ATTRIBUTE_CDATA);
5990 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5991 SKIP(6);
5992 return(XML_ATTRIBUTE_IDREFS);
5993 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5994 SKIP(5);
5995 return(XML_ATTRIBUTE_IDREF);
5996 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5997 SKIP(2);
5998 return(XML_ATTRIBUTE_ID);
5999 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6000 SKIP(6);
6001 return(XML_ATTRIBUTE_ENTITY);
6002 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6003 SKIP(8);
6004 return(XML_ATTRIBUTE_ENTITIES);
6005 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6006 SKIP(8);
6007 return(XML_ATTRIBUTE_NMTOKENS);
6008 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6009 SKIP(7);
6010 return(XML_ATTRIBUTE_NMTOKEN);
6011 }
6012 return(xmlParseEnumeratedType(ctxt, tree));
6013}
6014
6015/**
6016 * xmlParseAttributeListDecl:
6017 * @ctxt: an XML parser context
6018 *
6019 * : parse the Attribute list def for an element
6020 *
6021 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6022 *
6023 * [53] AttDef ::= S Name S AttType S DefaultDecl
6024 *
6025 */
6026void
6027xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6028 const xmlChar *elemName;
6029 const xmlChar *attrName;
6030 xmlEnumerationPtr tree;
6031
6032 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6033 xmlParserInputPtr input = ctxt->input;
6034
6035 SKIP(9);
6036 if (!IS_BLANK_CH(CUR)) {
6037 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6038 "Space required after '<!ATTLIST'\n");
6039 }
6040 SKIP_BLANKS;
6041 elemName = xmlParseName(ctxt);
6042 if (elemName == NULL) {
6043 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6044 "ATTLIST: no name for Element\n");
6045 return;
6046 }
6047 SKIP_BLANKS;
6048 GROW;
6049 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6050 const xmlChar *check = CUR_PTR;
6051 int type;
6052 int def;
6053 xmlChar *defaultValue = NULL;
6054
6055 GROW;
6056 tree = NULL;
6057 attrName = xmlParseName(ctxt);
6058 if (attrName == NULL) {
6059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6060 "ATTLIST: no name for Attribute\n");
6061 break;
6062 }
6063 GROW;
6064 if (!IS_BLANK_CH(CUR)) {
6065 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6066 "Space required after the attribute name\n");
6067 break;
6068 }
6069 SKIP_BLANKS;
6070
6071 type = xmlParseAttributeType(ctxt, &tree);
6072 if (type <= 0) {
6073 break;
6074 }
6075
6076 GROW;
6077 if (!IS_BLANK_CH(CUR)) {
6078 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6079 "Space required after the attribute type\n");
6080 if (tree != NULL)
6081 xmlFreeEnumeration(tree);
6082 break;
6083 }
6084 SKIP_BLANKS;
6085
6086 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6087 if (def <= 0) {
6088 if (defaultValue != NULL)
6089 xmlFree(defaultValue);
6090 if (tree != NULL)
6091 xmlFreeEnumeration(tree);
6092 break;
6093 }
6094 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6095 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6096
6097 GROW;
6098 if (RAW != '>') {
6099 if (!IS_BLANK_CH(CUR)) {
6100 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6101 "Space required after the attribute default value\n");
6102 if (defaultValue != NULL)
6103 xmlFree(defaultValue);
6104 if (tree != NULL)
6105 xmlFreeEnumeration(tree);
6106 break;
6107 }
6108 SKIP_BLANKS;
6109 }
6110 if (check == CUR_PTR) {
6111 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6112 "in xmlParseAttributeListDecl\n");
6113 if (defaultValue != NULL)
6114 xmlFree(defaultValue);
6115 if (tree != NULL)
6116 xmlFreeEnumeration(tree);
6117 break;
6118 }
6119 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6120 (ctxt->sax->attributeDecl != NULL))
6121 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6122 type, def, defaultValue, tree);
6123 else if (tree != NULL)
6124 xmlFreeEnumeration(tree);
6125
6126 if ((ctxt->sax2) && (defaultValue != NULL) &&
6127 (def != XML_ATTRIBUTE_IMPLIED) &&
6128 (def != XML_ATTRIBUTE_REQUIRED)) {
6129 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6130 }
6131 if (ctxt->sax2) {
6132 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6133 }
6134 if (defaultValue != NULL)
6135 xmlFree(defaultValue);
6136 GROW;
6137 }
6138 if (RAW == '>') {
6139 if (input != ctxt->input) {
6140 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6141 "Attribute list declaration doesn't start and stop in the same entity\n",
6142 NULL, NULL);
6143 }
6144 NEXT;
6145 }
6146 }
6147}
6148
6149/**
6150 * xmlParseElementMixedContentDecl:
6151 * @ctxt: an XML parser context
6152 * @inputchk: the input used for the current entity, needed for boundary checks
6153 *
6154 * parse the declaration for a Mixed Element content
6155 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6156 *
6157 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6158 * '(' S? '#PCDATA' S? ')'
6159 *
6160 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6161 *
6162 * [ VC: No Duplicate Types ]
6163 * The same name must not appear more than once in a single
6164 * mixed-content declaration.
6165 *
6166 * returns: the list of the xmlElementContentPtr describing the element choices
6167 */
6168xmlElementContentPtr
6169xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6170 xmlElementContentPtr ret = NULL, cur = NULL, n;
6171 const xmlChar *elem = NULL;
6172
6173 GROW;
6174 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6175 SKIP(7);
6176 SKIP_BLANKS;
6177 SHRINK;
6178 if (RAW == ')') {
6179 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6180 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181"Element content declaration doesn't start and stop in the same entity\n",
6182 NULL, NULL);
6183 }
6184 NEXT;
6185 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6186 if (ret == NULL)
6187 return(NULL);
6188 if (RAW == '*') {
6189 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6190 NEXT;
6191 }
6192 return(ret);
6193 }
6194 if ((RAW == '(') || (RAW == '|')) {
6195 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6196 if (ret == NULL) return(NULL);
6197 }
6198 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6199 NEXT;
6200 if (elem == NULL) {
6201 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6202 if (ret == NULL) return(NULL);
6203 ret->c1 = cur;
6204 if (cur != NULL)
6205 cur->parent = ret;
6206 cur = ret;
6207 } else {
6208 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6209 if (n == NULL) return(NULL);
6210 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6211 if (n->c1 != NULL)
6212 n->c1->parent = n;
6213 cur->c2 = n;
6214 if (n != NULL)
6215 n->parent = cur;
6216 cur = n;
6217 }
6218 SKIP_BLANKS;
6219 elem = xmlParseName(ctxt);
6220 if (elem == NULL) {
6221 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6222 "xmlParseElementMixedContentDecl : Name expected\n");
6223 xmlFreeDocElementContent(ctxt->myDoc, cur);
6224 return(NULL);
6225 }
6226 SKIP_BLANKS;
6227 GROW;
6228 }
6229 if ((RAW == ')') && (NXT(1) == '*')) {
6230 if (elem != NULL) {
6231 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6232 XML_ELEMENT_CONTENT_ELEMENT);
6233 if (cur->c2 != NULL)
6234 cur->c2->parent = cur;
6235 }
6236 if (ret != NULL)
6237 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6238 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6239 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6240"Element content declaration doesn't start and stop in the same entity\n",
6241 NULL, NULL);
6242 }
6243 SKIP(2);
6244 } else {
6245 xmlFreeDocElementContent(ctxt->myDoc, ret);
6246 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6247 return(NULL);
6248 }
6249
6250 } else {
6251 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6252 }
6253 return(ret);
6254}
6255
6256/**
6257 * xmlParseElementChildrenContentDeclPriv:
6258 * @ctxt: an XML parser context
6259 * @inputchk: the input used for the current entity, needed for boundary checks
6260 * @depth: the level of recursion
6261 *
6262 * parse the declaration for a Mixed Element content
6263 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6264 *
6265 *
6266 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6267 *
6268 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6269 *
6270 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6271 *
6272 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6273 *
6274 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6275 * TODO Parameter-entity replacement text must be properly nested
6276 * with parenthesized groups. That is to say, if either of the
6277 * opening or closing parentheses in a choice, seq, or Mixed
6278 * construct is contained in the replacement text for a parameter
6279 * entity, both must be contained in the same replacement text. For
6280 * interoperability, if a parameter-entity reference appears in a
6281 * choice, seq, or Mixed construct, its replacement text should not
6282 * be empty, and neither the first nor last non-blank character of
6283 * the replacement text should be a connector (| or ,).
6284 *
6285 * Returns the tree of xmlElementContentPtr describing the element
6286 * hierarchy.
6287 */
6288static xmlElementContentPtr
6289xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6290 int depth) {
6291 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6292 const xmlChar *elem;
6293 xmlChar type = 0;
6294
6295 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6296 (depth > 2048)) {
6297 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6298"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6299 depth);
6300 return(NULL);
6301 }
6302 SKIP_BLANKS;
6303 GROW;
6304 if (RAW == '(') {
6305 int inputid = ctxt->input->id;
6306
6307 /* Recurse on first child */
6308 NEXT;
6309 SKIP_BLANKS;
6310 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6311 depth + 1);
6312 SKIP_BLANKS;
6313 GROW;
6314 } else {
6315 elem = xmlParseName(ctxt);
6316 if (elem == NULL) {
6317 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6318 return(NULL);
6319 }
6320 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6321 if (cur == NULL) {
6322 xmlErrMemory(ctxt, NULL);
6323 return(NULL);
6324 }
6325 GROW;
6326 if (RAW == '?') {
6327 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6328 NEXT;
6329 } else if (RAW == '*') {
6330 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6331 NEXT;
6332 } else if (RAW == '+') {
6333 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6334 NEXT;
6335 } else {
6336 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6337 }
6338 GROW;
6339 }
6340 SKIP_BLANKS;
6341 SHRINK;
6342 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6343 /*
6344 * Each loop we parse one separator and one element.
6345 */
6346 if (RAW == ',') {
6347 if (type == 0) type = CUR;
6348
6349 /*
6350 * Detect "Name | Name , Name" error
6351 */
6352 else if (type != CUR) {
6353 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6354 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6355 type);
6356 if ((last != NULL) && (last != ret))
6357 xmlFreeDocElementContent(ctxt->myDoc, last);
6358 if (ret != NULL)
6359 xmlFreeDocElementContent(ctxt->myDoc, ret);
6360 return(NULL);
6361 }
6362 NEXT;
6363
6364 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6365 if (op == NULL) {
6366 if ((last != NULL) && (last != ret))
6367 xmlFreeDocElementContent(ctxt->myDoc, last);
6368 xmlFreeDocElementContent(ctxt->myDoc, ret);
6369 return(NULL);
6370 }
6371 if (last == NULL) {
6372 op->c1 = ret;
6373 if (ret != NULL)
6374 ret->parent = op;
6375 ret = cur = op;
6376 } else {
6377 cur->c2 = op;
6378 if (op != NULL)
6379 op->parent = cur;
6380 op->c1 = last;
6381 if (last != NULL)
6382 last->parent = op;
6383 cur =op;
6384 last = NULL;
6385 }
6386 } else if (RAW == '|') {
6387 if (type == 0) type = CUR;
6388
6389 /*
6390 * Detect "Name , Name | Name" error
6391 */
6392 else if (type != CUR) {
6393 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6394 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6395 type);
6396 if ((last != NULL) && (last != ret))
6397 xmlFreeDocElementContent(ctxt->myDoc, last);
6398 if (ret != NULL)
6399 xmlFreeDocElementContent(ctxt->myDoc, ret);
6400 return(NULL);
6401 }
6402 NEXT;
6403
6404 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6405 if (op == NULL) {
6406 if ((last != NULL) && (last != ret))
6407 xmlFreeDocElementContent(ctxt->myDoc, last);
6408 if (ret != NULL)
6409 xmlFreeDocElementContent(ctxt->myDoc, ret);
6410 return(NULL);
6411 }
6412 if (last == NULL) {
6413 op->c1 = ret;
6414 if (ret != NULL)
6415 ret->parent = op;
6416 ret = cur = op;
6417 } else {
6418 cur->c2 = op;
6419 if (op != NULL)
6420 op->parent = cur;
6421 op->c1 = last;
6422 if (last != NULL)
6423 last->parent = op;
6424 cur =op;
6425 last = NULL;
6426 }
6427 } else {
6428 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6429 if ((last != NULL) && (last != ret))
6430 xmlFreeDocElementContent(ctxt->myDoc, last);
6431 if (ret != NULL)
6432 xmlFreeDocElementContent(ctxt->myDoc, ret);
6433 return(NULL);
6434 }
6435 GROW;
6436 SKIP_BLANKS;
6437 GROW;
6438 if (RAW == '(') {
6439 int inputid = ctxt->input->id;
6440 /* Recurse on second child */
6441 NEXT;
6442 SKIP_BLANKS;
6443 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6444 depth + 1);
6445 SKIP_BLANKS;
6446 } else {
6447 elem = xmlParseName(ctxt);
6448 if (elem == NULL) {
6449 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6450 if (ret != NULL)
6451 xmlFreeDocElementContent(ctxt->myDoc, ret);
6452 return(NULL);
6453 }
6454 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6455 if (last == NULL) {
6456 if (ret != NULL)
6457 xmlFreeDocElementContent(ctxt->myDoc, ret);
6458 return(NULL);
6459 }
6460 if (RAW == '?') {
6461 last->ocur = XML_ELEMENT_CONTENT_OPT;
6462 NEXT;
6463 } else if (RAW == '*') {
6464 last->ocur = XML_ELEMENT_CONTENT_MULT;
6465 NEXT;
6466 } else if (RAW == '+') {
6467 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6468 NEXT;
6469 } else {
6470 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6471 }
6472 }
6473 SKIP_BLANKS;
6474 GROW;
6475 }
6476 if ((cur != NULL) && (last != NULL)) {
6477 cur->c2 = last;
6478 if (last != NULL)
6479 last->parent = cur;
6480 }
6481 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
6482 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6483"Element content declaration doesn't start and stop in the same entity\n",
6484 NULL, NULL);
6485 }
6486 NEXT;
6487 if (RAW == '?') {
6488 if (ret != NULL) {
6489 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6490 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6491 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6492 else
6493 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6494 }
6495 NEXT;
6496 } else if (RAW == '*') {
6497 if (ret != NULL) {
6498 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6499 cur = ret;
6500 /*
6501 * Some normalization:
6502 * (a | b* | c?)* == (a | b | c)*
6503 */
6504 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6505 if ((cur->c1 != NULL) &&
6506 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6507 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6508 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6509 if ((cur->c2 != NULL) &&
6510 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6511 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6512 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6513 cur = cur->c2;
6514 }
6515 }
6516 NEXT;
6517 } else if (RAW == '+') {
6518 if (ret != NULL) {
6519 int found = 0;
6520
6521 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6522 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6523 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6524 else
6525 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6526 /*
6527 * Some normalization:
6528 * (a | b*)+ == (a | b)*
6529 * (a | b?)+ == (a | b)*
6530 */
6531 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6532 if ((cur->c1 != NULL) &&
6533 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6534 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6535 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6536 found = 1;
6537 }
6538 if ((cur->c2 != NULL) &&
6539 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6540 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6541 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6542 found = 1;
6543 }
6544 cur = cur->c2;
6545 }
6546 if (found)
6547 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6548 }
6549 NEXT;
6550 }
6551 return(ret);
6552}
6553
6554/**
6555 * xmlParseElementChildrenContentDecl:
6556 * @ctxt: an XML parser context
6557 * @inputchk: the input used for the current entity, needed for boundary checks
6558 *
6559 * parse the declaration for a Mixed Element content
6560 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6561 *
6562 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6563 *
6564 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6565 *
6566 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6567 *
6568 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6569 *
6570 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6571 * TODO Parameter-entity replacement text must be properly nested
6572 * with parenthesized groups. That is to say, if either of the
6573 * opening or closing parentheses in a choice, seq, or Mixed
6574 * construct is contained in the replacement text for a parameter
6575 * entity, both must be contained in the same replacement text. For
6576 * interoperability, if a parameter-entity reference appears in a
6577 * choice, seq, or Mixed construct, its replacement text should not
6578 * be empty, and neither the first nor last non-blank character of
6579 * the replacement text should be a connector (| or ,).
6580 *
6581 * Returns the tree of xmlElementContentPtr describing the element
6582 * hierarchy.
6583 */
6584xmlElementContentPtr
6585xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6586 /* stub left for API/ABI compat */
6587 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6588}
6589
6590/**
6591 * xmlParseElementContentDecl:
6592 * @ctxt: an XML parser context
6593 * @name: the name of the element being defined.
6594 * @result: the Element Content pointer will be stored here if any
6595 *
6596 * parse the declaration for an Element content either Mixed or Children,
6597 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6598 *
6599 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6600 *
6601 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6602 */
6603
6604int
6605xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6606 xmlElementContentPtr *result) {
6607
6608 xmlElementContentPtr tree = NULL;
6609 int inputid = ctxt->input->id;
6610 int res;
6611
6612 *result = NULL;
6613
6614 if (RAW != '(') {
6615 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6616 "xmlParseElementContentDecl : %s '(' expected\n", name);
6617 return(-1);
6618 }
6619 NEXT;
6620 GROW;
6621 if (ctxt->instate == XML_PARSER_EOF)
6622 return(-1);
6623 SKIP_BLANKS;
6624 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6625 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6626 res = XML_ELEMENT_TYPE_MIXED;
6627 } else {
6628 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6629 res = XML_ELEMENT_TYPE_ELEMENT;
6630 }
6631 SKIP_BLANKS;
6632 *result = tree;
6633 return(res);
6634}
6635
6636/**
6637 * xmlParseElementDecl:
6638 * @ctxt: an XML parser context
6639 *
6640 * parse an Element declaration.
6641 *
6642 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6643 *
6644 * [ VC: Unique Element Type Declaration ]
6645 * No element type may be declared more than once
6646 *
6647 * Returns the type of the element, or -1 in case of error
6648 */
6649int
6650xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6651 const xmlChar *name;
6652 int ret = -1;
6653 xmlElementContentPtr content = NULL;
6654
6655 /* GROW; done in the caller */
6656 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6657 xmlParserInputPtr input = ctxt->input;
6658
6659 SKIP(9);
6660 if (!IS_BLANK_CH(CUR)) {
6661 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6662 "Space required after 'ELEMENT'\n");
6663 }
6664 SKIP_BLANKS;
6665 name = xmlParseName(ctxt);
6666 if (name == NULL) {
6667 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6668 "xmlParseElementDecl: no name for Element\n");
6669 return(-1);
6670 }
6671 while ((RAW == 0) && (ctxt->inputNr > 1))
6672 xmlPopInput(ctxt);
6673 if (!IS_BLANK_CH(CUR)) {
6674 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6675 "Space required after the element name\n");
6676 }
6677 SKIP_BLANKS;
6678 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6679 SKIP(5);
6680 /*
6681 * Element must always be empty.
6682 */
6683 ret = XML_ELEMENT_TYPE_EMPTY;
6684 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6685 (NXT(2) == 'Y')) {
6686 SKIP(3);
6687 /*
6688 * Element is a generic container.
6689 */
6690 ret = XML_ELEMENT_TYPE_ANY;
6691 } else if (RAW == '(') {
6692 ret = xmlParseElementContentDecl(ctxt, name, &content);
6693 } else {
6694 /*
6695 * [ WFC: PEs in Internal Subset ] error handling.
6696 */
6697 if ((RAW == '%') && (ctxt->external == 0) &&
6698 (ctxt->inputNr == 1)) {
6699 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6700 "PEReference: forbidden within markup decl in internal subset\n");
6701 } else {
6702 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6703 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6704 }
6705 return(-1);
6706 }
6707
6708 SKIP_BLANKS;
6709 /*
6710 * Pop-up of finished entities.
6711 */
6712 while ((RAW == 0) && (ctxt->inputNr > 1))
6713 xmlPopInput(ctxt);
6714 SKIP_BLANKS;
6715
6716 if (RAW != '>') {
6717 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6718 if (content != NULL) {
6719 xmlFreeDocElementContent(ctxt->myDoc, content);
6720 }
6721 } else {
6722 if (input != ctxt->input) {
6723 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6724 "Element declaration doesn't start and stop in the same entity\n");
6725 }
6726
6727 NEXT;
6728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6729 (ctxt->sax->elementDecl != NULL)) {
6730 if (content != NULL)
6731 content->parent = NULL;
6732 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6733 content);
6734 if ((content != NULL) && (content->parent == NULL)) {
6735 /*
6736 * this is a trick: if xmlAddElementDecl is called,
6737 * instead of copying the full tree it is plugged directly
6738 * if called from the parser. Avoid duplicating the
6739 * interfaces or change the API/ABI
6740 */
6741 xmlFreeDocElementContent(ctxt->myDoc, content);
6742 }
6743 } else if (content != NULL) {
6744 xmlFreeDocElementContent(ctxt->myDoc, content);
6745 }
6746 }
6747 }
6748 return(ret);
6749}
6750
6751/**
6752 * xmlParseConditionalSections
6753 * @ctxt: an XML parser context
6754 *
6755 * [61] conditionalSect ::= includeSect | ignoreSect
6756 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6757 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6758 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6759 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6760 */
6761
6762static void
6763xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6764 int id = ctxt->input->id;
6765
6766 SKIP(3);
6767 SKIP_BLANKS;
6768 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6769 SKIP(7);
6770 SKIP_BLANKS;
6771 if (RAW != '[') {
6772 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6773 xmlStopParser(ctxt);
6774 return;
6775 } else {
6776 if (ctxt->input->id != id) {
6777 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6778 "All markup of the conditional section is not in the same entity\n",
6779 NULL, NULL);
6780 }
6781 NEXT;
6782 }
6783 if (xmlParserDebugEntities) {
6784 if ((ctxt->input != NULL) && (ctxt->input->filename))
6785 xmlGenericError(xmlGenericErrorContext,
6786 "%s(%d): ", ctxt->input->filename,
6787 ctxt->input->line);
6788 xmlGenericError(xmlGenericErrorContext,
6789 "Entering INCLUDE Conditional Section\n");
6790 }
6791
6792 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6793 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
6794 const xmlChar *check = CUR_PTR;
6795 unsigned int cons = ctxt->input->consumed;
6796
6797 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6798 xmlParseConditionalSections(ctxt);
6799 } else if (IS_BLANK_CH(CUR)) {
6800 NEXT;
6801 } else if (RAW == '%') {
6802 xmlParsePEReference(ctxt);
6803 } else
6804 xmlParseMarkupDecl(ctxt);
6805
6806 /*
6807 * Pop-up of finished entities.
6808 */
6809 while ((RAW == 0) && (ctxt->inputNr > 1))
6810 xmlPopInput(ctxt);
6811
6812 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6813 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6814 break;
6815 }
6816 }
6817 if (xmlParserDebugEntities) {
6818 if ((ctxt->input != NULL) && (ctxt->input->filename))
6819 xmlGenericError(xmlGenericErrorContext,
6820 "%s(%d): ", ctxt->input->filename,
6821 ctxt->input->line);
6822 xmlGenericError(xmlGenericErrorContext,
6823 "Leaving INCLUDE Conditional Section\n");
6824 }
6825
6826 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6827 int state;
6828 xmlParserInputState instate;
6829 int depth = 0;
6830
6831 SKIP(6);
6832 SKIP_BLANKS;
6833 if (RAW != '[') {
6834 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6835 xmlStopParser(ctxt);
6836 return;
6837 } else {
6838 if (ctxt->input->id != id) {
6839 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6840 "All markup of the conditional section is not in the same entity\n",
6841 NULL, NULL);
6842 }
6843 NEXT;
6844 }
6845 if (xmlParserDebugEntities) {
6846 if ((ctxt->input != NULL) && (ctxt->input->filename))
6847 xmlGenericError(xmlGenericErrorContext,
6848 "%s(%d): ", ctxt->input->filename,
6849 ctxt->input->line);
6850 xmlGenericError(xmlGenericErrorContext,
6851 "Entering IGNORE Conditional Section\n");
6852 }
6853
6854 /*
6855 * Parse up to the end of the conditional section
6856 * But disable SAX event generating DTD building in the meantime
6857 */
6858 state = ctxt->disableSAX;
6859 instate = ctxt->instate;
6860 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6861 ctxt->instate = XML_PARSER_IGNORE;
6862
6863 while (((depth >= 0) && (RAW != 0)) &&
6864 (ctxt->instate != XML_PARSER_EOF)) {
6865 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6866 depth++;
6867 SKIP(3);
6868 continue;
6869 }
6870 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6871 if (--depth >= 0) SKIP(3);
6872 continue;
6873 }
6874 NEXT;
6875 continue;
6876 }
6877
6878 ctxt->disableSAX = state;
6879 ctxt->instate = instate;
6880
6881 if (xmlParserDebugEntities) {
6882 if ((ctxt->input != NULL) && (ctxt->input->filename))
6883 xmlGenericError(xmlGenericErrorContext,
6884 "%s(%d): ", ctxt->input->filename,
6885 ctxt->input->line);
6886 xmlGenericError(xmlGenericErrorContext,
6887 "Leaving IGNORE Conditional Section\n");
6888 }
6889
6890 } else {
6891 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6892 xmlStopParser(ctxt);
6893 return;
6894 }
6895
6896 if (RAW == 0)
6897 SHRINK;
6898
6899 if (RAW == 0) {
6900 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6901 } else {
6902 if (ctxt->input->id != id) {
6903 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6904 "All markup of the conditional section is not in the same entity\n",
6905 NULL, NULL);
6906 }
6907 SKIP(3);
6908 }
6909}
6910
6911/**
6912 * xmlParseMarkupDecl:
6913 * @ctxt: an XML parser context
6914 *
6915 * parse Markup declarations
6916 *
6917 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6918 * NotationDecl | PI | Comment
6919 *
6920 * [ VC: Proper Declaration/PE Nesting ]
6921 * Parameter-entity replacement text must be properly nested with
6922 * markup declarations. That is to say, if either the first character
6923 * or the last character of a markup declaration (markupdecl above) is
6924 * contained in the replacement text for a parameter-entity reference,
6925 * both must be contained in the same replacement text.
6926 *
6927 * [ WFC: PEs in Internal Subset ]
6928 * In the internal DTD subset, parameter-entity references can occur
6929 * only where markup declarations can occur, not within markup declarations.
6930 * (This does not apply to references that occur in external parameter
6931 * entities or to the external subset.)
6932 */
6933void
6934xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6935 GROW;
6936 if (CUR == '<') {
6937 if (NXT(1) == '!') {
6938 switch (NXT(2)) {
6939 case 'E':
6940 if (NXT(3) == 'L')
6941 xmlParseElementDecl(ctxt);
6942 else if (NXT(3) == 'N')
6943 xmlParseEntityDecl(ctxt);
6944 break;
6945 case 'A':
6946 xmlParseAttributeListDecl(ctxt);
6947 break;
6948 case 'N':
6949 xmlParseNotationDecl(ctxt);
6950 break;
6951 case '-':
6952 xmlParseComment(ctxt);
6953 break;
6954 default:
6955 /* there is an error but it will be detected later */
6956 break;
6957 }
6958 } else if (NXT(1) == '?') {
6959 xmlParsePI(ctxt);
6960 }
6961 }
6962 /*
6963 * This is only for internal subset. On external entities,
6964 * the replacement is done before parsing stage
6965 */
6966 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6967 xmlParsePEReference(ctxt);
6968
6969 /*
6970 * Conditional sections are allowed from entities included
6971 * by PE References in the internal subset.
6972 */
6973 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6974 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6975 xmlParseConditionalSections(ctxt);
6976 }
6977 }
6978
6979 ctxt->instate = XML_PARSER_DTD;
6980}
6981
6982/**
6983 * xmlParseTextDecl:
6984 * @ctxt: an XML parser context
6985 *
6986 * parse an XML declaration header for external entities
6987 *
6988 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6989 */
6990
6991void
6992xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6993 xmlChar *version;
6994 const xmlChar *encoding;
6995
6996 /*
6997 * We know that '<?xml' is here.
6998 */
6999 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7000 SKIP(5);
7001 } else {
7002 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7003 return;
7004 }
7005
7006 if (!IS_BLANK_CH(CUR)) {
7007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7008 "Space needed after '<?xml'\n");
7009 }
7010 SKIP_BLANKS;
7011
7012 /*
7013 * We may have the VersionInfo here.
7014 */
7015 version = xmlParseVersionInfo(ctxt);
7016 if (version == NULL)
7017 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7018 else {
7019 if (!IS_BLANK_CH(CUR)) {
7020 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7021 "Space needed here\n");
7022 }
7023 }
7024 ctxt->input->version = version;
7025
7026 /*
7027 * We must have the encoding declaration
7028 */
7029 encoding = xmlParseEncodingDecl(ctxt);
7030 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7031 /*
7032 * The XML REC instructs us to stop parsing right here
7033 */
7034 return;
7035 }
7036 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7037 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7038 "Missing encoding in text declaration\n");
7039 }
7040
7041 SKIP_BLANKS;
7042 if ((RAW == '?') && (NXT(1) == '>')) {
7043 SKIP(2);
7044 } else if (RAW == '>') {
7045 /* Deprecated old WD ... */
7046 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7047 NEXT;
7048 } else {
7049 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7050 MOVETO_ENDTAG(CUR_PTR);
7051 NEXT;
7052 }
7053}
7054
7055/**
7056 * xmlParseExternalSubset:
7057 * @ctxt: an XML parser context
7058 * @ExternalID: the external identifier
7059 * @SystemID: the system identifier (or URL)
7060 *
7061 * parse Markup declarations from an external subset
7062 *
7063 * [30] extSubset ::= textDecl? extSubsetDecl
7064 *
7065 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7066 */
7067void
7068xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7069 const xmlChar *SystemID) {
7070 xmlDetectSAX2(ctxt);
7071 GROW;
7072
7073 if ((ctxt->encoding == NULL) &&
7074 (ctxt->input->end - ctxt->input->cur >= 4)) {
7075 xmlChar start[4];
7076 xmlCharEncoding enc;
7077
7078 start[0] = RAW;
7079 start[1] = NXT(1);
7080 start[2] = NXT(2);
7081 start[3] = NXT(3);
7082 enc = xmlDetectCharEncoding(start, 4);
7083 if (enc != XML_CHAR_ENCODING_NONE)
7084 xmlSwitchEncoding(ctxt, enc);
7085 }
7086
7087 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7088 xmlParseTextDecl(ctxt);
7089 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7090 /*
7091 * The XML REC instructs us to stop parsing right here
7092 */
7093 ctxt->instate = XML_PARSER_EOF;
7094 return;
7095 }
7096 }
7097 if (ctxt->myDoc == NULL) {
7098 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7099 if (ctxt->myDoc == NULL) {
7100 xmlErrMemory(ctxt, "New Doc failed");
7101 return;
7102 }
7103 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7104 }
7105 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7106 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7107
7108 ctxt->instate = XML_PARSER_DTD;
7109 ctxt->external = 1;
7110 while (((RAW == '<') && (NXT(1) == '?')) ||
7111 ((RAW == '<') && (NXT(1) == '!')) ||
7112 (RAW == '%') || IS_BLANK_CH(CUR)) {
7113 const xmlChar *check = CUR_PTR;
7114 unsigned int cons = ctxt->input->consumed;
7115
7116 GROW;
7117 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7118 xmlParseConditionalSections(ctxt);
7119 } else if (IS_BLANK_CH(CUR)) {
7120 NEXT;
7121 } else if (RAW == '%') {
7122 xmlParsePEReference(ctxt);
7123 } else
7124 xmlParseMarkupDecl(ctxt);
7125
7126 /*
7127 * Pop-up of finished entities.
7128 */
7129 while ((RAW == 0) && (ctxt->inputNr > 1))
7130 xmlPopInput(ctxt);
7131
7132 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
7133 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7134 break;
7135 }
7136 }
7137
7138 if (RAW != 0) {
7139 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7140 }
7141
7142}
7143
7144/**
7145 * xmlParseReference:
7146 * @ctxt: an XML parser context
7147 *
7148 * parse and handle entity references in content, depending on the SAX
7149 * interface, this may end-up in a call to character() if this is a
7150 * CharRef, a predefined entity, if there is no reference() callback.
7151 * or if the parser was asked to switch to that mode.
7152 *
7153 * [67] Reference ::= EntityRef | CharRef
7154 */
7155void
7156xmlParseReference(xmlParserCtxtPtr ctxt) {
7157 xmlEntityPtr ent;
7158 xmlChar *val;
7159 int was_checked;
7160 xmlNodePtr list = NULL;
7161 xmlParserErrors ret = XML_ERR_OK;
7162
7163
7164 if (RAW != '&')
7165 return;
7166
7167 /*
7168 * Simple case of a CharRef
7169 */
7170 if (NXT(1) == '#') {
7171 int i = 0;
7172 xmlChar out[10];
7173 int hex = NXT(2);
7174 int value = xmlParseCharRef(ctxt);
7175
7176 if (value == 0)
7177 return;
7178 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7179 /*
7180 * So we are using non-UTF-8 buffers
7181 * Check that the char fit on 8bits, if not
7182 * generate a CharRef.
7183 */
7184 if (value <= 0xFF) {
7185 out[0] = value;
7186 out[1] = 0;
7187 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7188 (!ctxt->disableSAX))
7189 ctxt->sax->characters(ctxt->userData, out, 1);
7190 } else {
7191 if ((hex == 'x') || (hex == 'X'))
7192 snprintf((char *)out, sizeof(out), "#x%X", value);
7193 else
7194 snprintf((char *)out, sizeof(out), "#%d", value);
7195 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7196 (!ctxt->disableSAX))
7197 ctxt->sax->reference(ctxt->userData, out);
7198 }
7199 } else {
7200 /*
7201 * Just encode the value in UTF-8
7202 */
7203 COPY_BUF(0 ,out, i, value);
7204 out[i] = 0;
7205 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7206 (!ctxt->disableSAX))
7207 ctxt->sax->characters(ctxt->userData, out, i);
7208 }
7209 return;
7210 }
7211
7212 /*
7213 * We are seeing an entity reference
7214 */
7215 ent = xmlParseEntityRef(ctxt);
7216 if (ent == NULL) return;
7217 if (!ctxt->wellFormed)
7218 return;
7219 was_checked = ent->checked;
7220
7221 /* special case of predefined entities */
7222 if ((ent->name == NULL) ||
7223 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7224 val = ent->content;
7225 if (val == NULL) return;
7226 /*
7227 * inline the entity.
7228 */
7229 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7230 (!ctxt->disableSAX))
7231 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7232 return;
7233 }
7234
7235 /*
7236 * The first reference to the entity trigger a parsing phase
7237 * where the ent->children is filled with the result from
7238 * the parsing.
7239 * Note: external parsed entities will not be loaded, it is not
7240 * required for a non-validating parser, unless the parsing option
7241 * of validating, or substituting entities were given. Doing so is
7242 * far more secure as the parser will only process data coming from
7243 * the document entity by default.
7244 */
7245 if (((ent->checked == 0) ||
7246 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7247 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7248 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7249 unsigned long oldnbent = ctxt->nbentities;
7250
7251 /*
7252 * This is a bit hackish but this seems the best
7253 * way to make sure both SAX and DOM entity support
7254 * behaves okay.
7255 */
7256 void *user_data;
7257 if (ctxt->userData == ctxt)
7258 user_data = NULL;
7259 else
7260 user_data = ctxt->userData;
7261
7262 /*
7263 * Check that this entity is well formed
7264 * 4.3.2: An internal general parsed entity is well-formed
7265 * if its replacement text matches the production labeled
7266 * content.
7267 */
7268 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7269 ctxt->depth++;
7270 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7271 user_data, &list);
7272 ctxt->depth--;
7273
7274 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7275 ctxt->depth++;
7276 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7277 user_data, ctxt->depth, ent->URI,
7278 ent->ExternalID, &list);
7279 ctxt->depth--;
7280 } else {
7281 ret = XML_ERR_ENTITY_PE_INTERNAL;
7282 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7283 "invalid entity type found\n", NULL);
7284 }
7285
7286 /*
7287 * Store the number of entities needing parsing for this entity
7288 * content and do checkings
7289 */
7290 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7291 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7292 ent->checked |= 1;
7293 if (ret == XML_ERR_ENTITY_LOOP) {
7294 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7295 xmlFreeNodeList(list);
7296 return;
7297 }
7298 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7299 xmlFreeNodeList(list);
7300 return;
7301 }
7302
7303 if ((ret == XML_ERR_OK) && (list != NULL)) {
7304 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7305 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7306 (ent->children == NULL)) {
7307 ent->children = list;
7308 if (ctxt->replaceEntities) {
7309 /*
7310 * Prune it directly in the generated document
7311 * except for single text nodes.
7312 */
7313 if (((list->type == XML_TEXT_NODE) &&
7314 (list->next == NULL)) ||
7315 (ctxt->parseMode == XML_PARSE_READER)) {
7316 list->parent = (xmlNodePtr) ent;
7317 list = NULL;
7318 ent->owner = 1;
7319 } else {
7320 ent->owner = 0;
7321 while (list != NULL) {
7322 list->parent = (xmlNodePtr) ctxt->node;
7323 list->doc = ctxt->myDoc;
7324 if (list->next == NULL)
7325 ent->last = list;
7326 list = list->next;
7327 }
7328 list = ent->children;
7329#ifdef LIBXML_LEGACY_ENABLED
7330 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7331 xmlAddEntityReference(ent, list, NULL);
7332#endif /* LIBXML_LEGACY_ENABLED */
7333 }
7334 } else {
7335 ent->owner = 1;
7336 while (list != NULL) {
7337 list->parent = (xmlNodePtr) ent;
7338 xmlSetTreeDoc(list, ent->doc);
7339 if (list->next == NULL)
7340 ent->last = list;
7341 list = list->next;
7342 }
7343 }
7344 } else {
7345 xmlFreeNodeList(list);
7346 list = NULL;
7347 }
7348 } else if ((ret != XML_ERR_OK) &&
7349 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7350 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7351 "Entity '%s' failed to parse\n", ent->name);
7352 xmlParserEntityCheck(ctxt, 0, ent, 0);
7353 } else if (list != NULL) {
7354 xmlFreeNodeList(list);
7355 list = NULL;
7356 }
7357 if (ent->checked == 0)
7358 ent->checked = 2;
7359 } else if (ent->checked != 1) {
7360 ctxt->nbentities += ent->checked / 2;
7361 }
7362
7363 /*
7364 * Now that the entity content has been gathered
7365 * provide it to the application, this can take different forms based
7366 * on the parsing modes.
7367 */
7368 if (ent->children == NULL) {
7369 /*
7370 * Probably running in SAX mode and the callbacks don't
7371 * build the entity content. So unless we already went
7372 * though parsing for first checking go though the entity
7373 * content to generate callbacks associated to the entity
7374 */
7375 if (was_checked != 0) {
7376 void *user_data;
7377 /*
7378 * This is a bit hackish but this seems the best
7379 * way to make sure both SAX and DOM entity support
7380 * behaves okay.
7381 */
7382 if (ctxt->userData == ctxt)
7383 user_data = NULL;
7384 else
7385 user_data = ctxt->userData;
7386
7387 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7388 ctxt->depth++;
7389 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7390 ent->content, user_data, NULL);
7391 ctxt->depth--;
7392 } else if (ent->etype ==
7393 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7394 ctxt->depth++;
7395 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7396 ctxt->sax, user_data, ctxt->depth,
7397 ent->URI, ent->ExternalID, NULL);
7398 ctxt->depth--;
7399 } else {
7400 ret = XML_ERR_ENTITY_PE_INTERNAL;
7401 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7402 "invalid entity type found\n", NULL);
7403 }
7404 if (ret == XML_ERR_ENTITY_LOOP) {
7405 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7406 return;
7407 }
7408 }
7409 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7410 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7411 /*
7412 * Entity reference callback comes second, it's somewhat
7413 * superfluous but a compatibility to historical behaviour
7414 */
7415 ctxt->sax->reference(ctxt->userData, ent->name);
7416 }
7417 return;
7418 }
7419
7420 /*
7421 * If we didn't get any children for the entity being built
7422 */
7423 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7424 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7425 /*
7426 * Create a node.
7427 */
7428 ctxt->sax->reference(ctxt->userData, ent->name);
7429 return;
7430 }
7431
7432 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7433 /*
7434 * There is a problem on the handling of _private for entities
7435 * (bug 155816): Should we copy the content of the field from
7436 * the entity (possibly overwriting some value set by the user
7437 * when a copy is created), should we leave it alone, or should
7438 * we try to take care of different situations? The problem
7439 * is exacerbated by the usage of this field by the xmlReader.
7440 * To fix this bug, we look at _private on the created node
7441 * and, if it's NULL, we copy in whatever was in the entity.
7442 * If it's not NULL we leave it alone. This is somewhat of a
7443 * hack - maybe we should have further tests to determine
7444 * what to do.
7445 */
7446 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7447 /*
7448 * Seems we are generating the DOM content, do
7449 * a simple tree copy for all references except the first
7450 * In the first occurrence list contains the replacement.
7451 */
7452 if (((list == NULL) && (ent->owner == 0)) ||
7453 (ctxt->parseMode == XML_PARSE_READER)) {
7454 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7455
7456 /*
7457 * We are copying here, make sure there is no abuse
7458 */
7459 ctxt->sizeentcopy += ent->length + 5;
7460 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7461 return;
7462
7463 /*
7464 * when operating on a reader, the entities definitions
7465 * are always owning the entities subtree.
7466 if (ctxt->parseMode == XML_PARSE_READER)
7467 ent->owner = 1;
7468 */
7469
7470 cur = ent->children;
7471 while (cur != NULL) {
7472 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7473 if (nw != NULL) {
7474 if (nw->_private == NULL)
7475 nw->_private = cur->_private;
7476 if (firstChild == NULL){
7477 firstChild = nw;
7478 }
7479 nw = xmlAddChild(ctxt->node, nw);
7480 }
7481 if (cur == ent->last) {
7482 /*
7483 * needed to detect some strange empty
7484 * node cases in the reader tests
7485 */
7486 if ((ctxt->parseMode == XML_PARSE_READER) &&
7487 (nw != NULL) &&
7488 (nw->type == XML_ELEMENT_NODE) &&
7489 (nw->children == NULL))
7490 nw->extra = 1;
7491
7492 break;
7493 }
7494 cur = cur->next;
7495 }
7496#ifdef LIBXML_LEGACY_ENABLED
7497 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7498 xmlAddEntityReference(ent, firstChild, nw);
7499#endif /* LIBXML_LEGACY_ENABLED */
7500 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7501 xmlNodePtr nw = NULL, cur, next, last,
7502 firstChild = NULL;
7503
7504 /*
7505 * We are copying here, make sure there is no abuse
7506 */
7507 ctxt->sizeentcopy += ent->length + 5;
7508 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7509 return;
7510
7511 /*
7512 * Copy the entity child list and make it the new
7513 * entity child list. The goal is to make sure any
7514 * ID or REF referenced will be the one from the
7515 * document content and not the entity copy.
7516 */
7517 cur = ent->children;
7518 ent->children = NULL;
7519 last = ent->last;
7520 ent->last = NULL;
7521 while (cur != NULL) {
7522 next = cur->next;
7523 cur->next = NULL;
7524 cur->parent = NULL;
7525 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7526 if (nw != NULL) {
7527 if (nw->_private == NULL)
7528 nw->_private = cur->_private;
7529 if (firstChild == NULL){
7530 firstChild = cur;
7531 }
7532 xmlAddChild((xmlNodePtr) ent, nw);
7533 xmlAddChild(ctxt->node, cur);
7534 }
7535 if (cur == last)
7536 break;
7537 cur = next;
7538 }
7539 if (ent->owner == 0)
7540 ent->owner = 1;
7541#ifdef LIBXML_LEGACY_ENABLED
7542 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7543 xmlAddEntityReference(ent, firstChild, nw);
7544#endif /* LIBXML_LEGACY_ENABLED */
7545 } else {
7546 const xmlChar *nbktext;
7547
7548 /*
7549 * the name change is to avoid coalescing of the
7550 * node with a possible previous text one which
7551 * would make ent->children a dangling pointer
7552 */
7553 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7554 -1);
7555 if (ent->children->type == XML_TEXT_NODE)
7556 ent->children->name = nbktext;
7557 if ((ent->last != ent->children) &&
7558 (ent->last->type == XML_TEXT_NODE))
7559 ent->last->name = nbktext;
7560 xmlAddChildList(ctxt->node, ent->children);
7561 }
7562
7563 /*
7564 * This is to avoid a nasty side effect, see
7565 * characters() in SAX.c
7566 */
7567 ctxt->nodemem = 0;
7568 ctxt->nodelen = 0;
7569 return;
7570 }
7571 }
7572}
7573
7574/**
7575 * xmlParseEntityRef:
7576 * @ctxt: an XML parser context
7577 *
7578 * parse ENTITY references declarations
7579 *
7580 * [68] EntityRef ::= '&' Name ';'
7581 *
7582 * [ WFC: Entity Declared ]
7583 * In a document without any DTD, a document with only an internal DTD
7584 * subset which contains no parameter entity references, or a document
7585 * with "standalone='yes'", the Name given in the entity reference
7586 * must match that in an entity declaration, except that well-formed
7587 * documents need not declare any of the following entities: amp, lt,
7588 * gt, apos, quot. The declaration of a parameter entity must precede
7589 * any reference to it. Similarly, the declaration of a general entity
7590 * must precede any reference to it which appears in a default value in an
7591 * attribute-list declaration. Note that if entities are declared in the
7592 * external subset or in external parameter entities, a non-validating
7593 * processor is not obligated to read and process their declarations;
7594 * for such documents, the rule that an entity must be declared is a
7595 * well-formedness constraint only if standalone='yes'.
7596 *
7597 * [ WFC: Parsed Entity ]
7598 * An entity reference must not contain the name of an unparsed entity
7599 *
7600 * Returns the xmlEntityPtr if found, or NULL otherwise.
7601 */
7602xmlEntityPtr
7603xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7604 const xmlChar *name;
7605 xmlEntityPtr ent = NULL;
7606
7607 GROW;
7608 if (ctxt->instate == XML_PARSER_EOF)
7609 return(NULL);
7610
7611 if (RAW != '&')
7612 return(NULL);
7613 NEXT;
7614 name = xmlParseName(ctxt);
7615 if (name == NULL) {
7616 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7617 "xmlParseEntityRef: no name\n");
7618 return(NULL);
7619 }
7620 if (RAW != ';') {
7621 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7622 return(NULL);
7623 }
7624 NEXT;
7625
7626 /*
7627 * Predefined entities override any extra definition
7628 */
7629 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7630 ent = xmlGetPredefinedEntity(name);
7631 if (ent != NULL)
7632 return(ent);
7633 }
7634
7635 /*
7636 * Increase the number of entity references parsed
7637 */
7638 ctxt->nbentities++;
7639
7640 /*
7641 * Ask first SAX for entity resolution, otherwise try the
7642 * entities which may have stored in the parser context.
7643 */
7644 if (ctxt->sax != NULL) {
7645 if (ctxt->sax->getEntity != NULL)
7646 ent = ctxt->sax->getEntity(ctxt->userData, name);
7647 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7648 (ctxt->options & XML_PARSE_OLDSAX))
7649 ent = xmlGetPredefinedEntity(name);
7650 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7651 (ctxt->userData==ctxt)) {
7652 ent = xmlSAX2GetEntity(ctxt, name);
7653 }
7654 }
7655 if (ctxt->instate == XML_PARSER_EOF)
7656 return(NULL);
7657 /*
7658 * [ WFC: Entity Declared ]
7659 * In a document without any DTD, a document with only an
7660 * internal DTD subset which contains no parameter entity
7661 * references, or a document with "standalone='yes'", the
7662 * Name given in the entity reference must match that in an
7663 * entity declaration, except that well-formed documents
7664 * need not declare any of the following entities: amp, lt,
7665 * gt, apos, quot.
7666 * The declaration of a parameter entity must precede any
7667 * reference to it.
7668 * Similarly, the declaration of a general entity must
7669 * precede any reference to it which appears in a default
7670 * value in an attribute-list declaration. Note that if
7671 * entities are declared in the external subset or in
7672 * external parameter entities, a non-validating processor
7673 * is not obligated to read and process their declarations;
7674 * for such documents, the rule that an entity must be
7675 * declared is a well-formedness constraint only if
7676 * standalone='yes'.
7677 */
7678 if (ent == NULL) {
7679 if ((ctxt->standalone == 1) ||
7680 ((ctxt->hasExternalSubset == 0) &&
7681 (ctxt->hasPErefs == 0))) {
7682 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7683 "Entity '%s' not defined\n", name);
7684 } else {
7685 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7686 "Entity '%s' not defined\n", name);
7687 if ((ctxt->inSubset == 0) &&
7688 (ctxt->sax != NULL) &&
7689 (ctxt->sax->reference != NULL)) {
7690 ctxt->sax->reference(ctxt->userData, name);
7691 }
7692 }
7693 xmlParserEntityCheck(ctxt, 0, ent, 0);
7694 ctxt->valid = 0;
7695 }
7696
7697 /*
7698 * [ WFC: Parsed Entity ]
7699 * An entity reference must not contain the name of an
7700 * unparsed entity
7701 */
7702 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7703 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7704 "Entity reference to unparsed entity %s\n", name);
7705 }
7706
7707 /*
7708 * [ WFC: No External Entity References ]
7709 * Attribute values cannot contain direct or indirect
7710 * entity references to external entities.
7711 */
7712 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7713 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7714 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7715 "Attribute references external entity '%s'\n", name);
7716 }
7717 /*
7718 * [ WFC: No < in Attribute Values ]
7719 * The replacement text of any entity referred to directly or
7720 * indirectly in an attribute value (other than "&lt;") must
7721 * not contain a <.
7722 */
7723 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7724 (ent != NULL) &&
7725 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7726 if (((ent->checked & 1) || (ent->checked == 0)) &&
7727 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7728 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7729 "'<' in entity '%s' is not allowed in attributes values\n", name);
7730 }
7731 }
7732
7733 /*
7734 * Internal check, no parameter entities here ...
7735 */
7736 else {
7737 switch (ent->etype) {
7738 case XML_INTERNAL_PARAMETER_ENTITY:
7739 case XML_EXTERNAL_PARAMETER_ENTITY:
7740 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7741 "Attempt to reference the parameter entity '%s'\n",
7742 name);
7743 break;
7744 default:
7745 break;
7746 }
7747 }
7748
7749 /*
7750 * [ WFC: No Recursion ]
7751 * A parsed entity must not contain a recursive reference
7752 * to itself, either directly or indirectly.
7753 * Done somewhere else
7754 */
7755 return(ent);
7756}
7757
7758/**
7759 * xmlParseStringEntityRef:
7760 * @ctxt: an XML parser context
7761 * @str: a pointer to an index in the string
7762 *
7763 * parse ENTITY references declarations, but this version parses it from
7764 * a string value.
7765 *
7766 * [68] EntityRef ::= '&' Name ';'
7767 *
7768 * [ WFC: Entity Declared ]
7769 * In a document without any DTD, a document with only an internal DTD
7770 * subset which contains no parameter entity references, or a document
7771 * with "standalone='yes'", the Name given in the entity reference
7772 * must match that in an entity declaration, except that well-formed
7773 * documents need not declare any of the following entities: amp, lt,
7774 * gt, apos, quot. The declaration of a parameter entity must precede
7775 * any reference to it. Similarly, the declaration of a general entity
7776 * must precede any reference to it which appears in a default value in an
7777 * attribute-list declaration. Note that if entities are declared in the
7778 * external subset or in external parameter entities, a non-validating
7779 * processor is not obligated to read and process their declarations;
7780 * for such documents, the rule that an entity must be declared is a
7781 * well-formedness constraint only if standalone='yes'.
7782 *
7783 * [ WFC: Parsed Entity ]
7784 * An entity reference must not contain the name of an unparsed entity
7785 *
7786 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7787 * is updated to the current location in the string.
7788 */
7789static xmlEntityPtr
7790xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7791 xmlChar *name;
7792 const xmlChar *ptr;
7793 xmlChar cur;
7794 xmlEntityPtr ent = NULL;
7795
7796 if ((str == NULL) || (*str == NULL))
7797 return(NULL);
7798 ptr = *str;
7799 cur = *ptr;
7800 if (cur != '&')
7801 return(NULL);
7802
7803 ptr++;
7804 name = xmlParseStringName(ctxt, &ptr);
7805 if (name == NULL) {
7806 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7807 "xmlParseStringEntityRef: no name\n");
7808 *str = ptr;
7809 return(NULL);
7810 }
7811 if (*ptr != ';') {
7812 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7813 xmlFree(name);
7814 *str = ptr;
7815 return(NULL);
7816 }
7817 ptr++;
7818
7819
7820 /*
7821 * Predefined entities override any extra definition
7822 */
7823 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7824 ent = xmlGetPredefinedEntity(name);
7825 if (ent != NULL) {
7826 xmlFree(name);
7827 *str = ptr;
7828 return(ent);
7829 }
7830 }
7831
7832 /*
7833 * Increate the number of entity references parsed
7834 */
7835 ctxt->nbentities++;
7836
7837 /*
7838 * Ask first SAX for entity resolution, otherwise try the
7839 * entities which may have stored in the parser context.
7840 */
7841 if (ctxt->sax != NULL) {
7842 if (ctxt->sax->getEntity != NULL)
7843 ent = ctxt->sax->getEntity(ctxt->userData, name);
7844 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7845 ent = xmlGetPredefinedEntity(name);
7846 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7847 ent = xmlSAX2GetEntity(ctxt, name);
7848 }
7849 }
7850 if (ctxt->instate == XML_PARSER_EOF) {
7851 xmlFree(name);
7852 return(NULL);
7853 }
7854
7855 /*
7856 * [ WFC: Entity Declared ]
7857 * In a document without any DTD, a document with only an
7858 * internal DTD subset which contains no parameter entity
7859 * references, or a document with "standalone='yes'", the
7860 * Name given in the entity reference must match that in an
7861 * entity declaration, except that well-formed documents
7862 * need not declare any of the following entities: amp, lt,
7863 * gt, apos, quot.
7864 * The declaration of a parameter entity must precede any
7865 * reference to it.
7866 * Similarly, the declaration of a general entity must
7867 * precede any reference to it which appears in a default
7868 * value in an attribute-list declaration. Note that if
7869 * entities are declared in the external subset or in
7870 * external parameter entities, a non-validating processor
7871 * is not obligated to read and process their declarations;
7872 * for such documents, the rule that an entity must be
7873 * declared is a well-formedness constraint only if
7874 * standalone='yes'.
7875 */
7876 if (ent == NULL) {
7877 if ((ctxt->standalone == 1) ||
7878 ((ctxt->hasExternalSubset == 0) &&
7879 (ctxt->hasPErefs == 0))) {
7880 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7881 "Entity '%s' not defined\n", name);
7882 } else {
7883 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7884 "Entity '%s' not defined\n",
7885 name);
7886 }
7887 xmlParserEntityCheck(ctxt, 0, ent, 0);
7888 /* TODO ? check regressions ctxt->valid = 0; */
7889 }
7890
7891 /*
7892 * [ WFC: Parsed Entity ]
7893 * An entity reference must not contain the name of an
7894 * unparsed entity
7895 */
7896 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7897 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7898 "Entity reference to unparsed entity %s\n", name);
7899 }
7900
7901 /*
7902 * [ WFC: No External Entity References ]
7903 * Attribute values cannot contain direct or indirect
7904 * entity references to external entities.
7905 */
7906 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7907 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7908 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7909 "Attribute references external entity '%s'\n", name);
7910 }
7911 /*
7912 * [ WFC: No < in Attribute Values ]
7913 * The replacement text of any entity referred to directly or
7914 * indirectly in an attribute value (other than "&lt;") must
7915 * not contain a <.
7916 */
7917 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7918 (ent != NULL) && (ent->content != NULL) &&
7919 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7920 (xmlStrchr(ent->content, '<'))) {
7921 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7922 "'<' in entity '%s' is not allowed in attributes values\n",
7923 name);
7924 }
7925
7926 /*
7927 * Internal check, no parameter entities here ...
7928 */
7929 else {
7930 switch (ent->etype) {
7931 case XML_INTERNAL_PARAMETER_ENTITY:
7932 case XML_EXTERNAL_PARAMETER_ENTITY:
7933 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7934 "Attempt to reference the parameter entity '%s'\n",
7935 name);
7936 break;
7937 default:
7938 break;
7939 }
7940 }
7941
7942 /*
7943 * [ WFC: No Recursion ]
7944 * A parsed entity must not contain a recursive reference
7945 * to itself, either directly or indirectly.
7946 * Done somewhere else
7947 */
7948
7949 xmlFree(name);
7950 *str = ptr;
7951 return(ent);
7952}
7953
7954/**
7955 * xmlParsePEReference:
7956 * @ctxt: an XML parser context
7957 *
7958 * parse PEReference declarations
7959 * The entity content is handled directly by pushing it's content as
7960 * a new input stream.
7961 *
7962 * [69] PEReference ::= '%' Name ';'
7963 *
7964 * [ WFC: No Recursion ]
7965 * A parsed entity must not contain a recursive
7966 * reference to itself, either directly or indirectly.
7967 *
7968 * [ WFC: Entity Declared ]
7969 * In a document without any DTD, a document with only an internal DTD
7970 * subset which contains no parameter entity references, or a document
7971 * with "standalone='yes'", ... ... The declaration of a parameter
7972 * entity must precede any reference to it...
7973 *
7974 * [ VC: Entity Declared ]
7975 * In a document with an external subset or external parameter entities
7976 * with "standalone='no'", ... ... The declaration of a parameter entity
7977 * must precede any reference to it...
7978 *
7979 * [ WFC: In DTD ]
7980 * Parameter-entity references may only appear in the DTD.
7981 * NOTE: misleading but this is handled.
7982 */
7983void
7984xmlParsePEReference(xmlParserCtxtPtr ctxt)
7985{
7986 const xmlChar *name;
7987 xmlEntityPtr entity = NULL;
7988 xmlParserInputPtr input;
7989
7990 if (RAW != '%')
7991 return;
7992 NEXT;
7993 name = xmlParseName(ctxt);
7994 if (name == NULL) {
7995 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7996 "xmlParsePEReference: no name\n");
7997 return;
7998 }
7999 if (RAW != ';') {
8000 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8001 return;
8002 }
8003
8004 NEXT;
8005
8006 /*
8007 * Increate the number of entity references parsed
8008 */
8009 ctxt->nbentities++;
8010
8011 /*
8012 * Request the entity from SAX
8013 */
8014 if ((ctxt->sax != NULL) &&
8015 (ctxt->sax->getParameterEntity != NULL))
8016 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8017 if (ctxt->instate == XML_PARSER_EOF)
8018 return;
8019 if (entity == NULL) {
8020 /*
8021 * [ WFC: Entity Declared ]
8022 * In a document without any DTD, a document with only an
8023 * internal DTD subset which contains no parameter entity
8024 * references, or a document with "standalone='yes'", ...
8025 * ... The declaration of a parameter entity must precede
8026 * any reference to it...
8027 */
8028 if ((ctxt->standalone == 1) ||
8029 ((ctxt->hasExternalSubset == 0) &&
8030 (ctxt->hasPErefs == 0))) {
8031 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8032 "PEReference: %%%s; not found\n",
8033 name);
8034 } else {
8035 /*
8036 * [ VC: Entity Declared ]
8037 * In a document with an external subset or external
8038 * parameter entities with "standalone='no'", ...
8039 * ... The declaration of a parameter entity must
8040 * precede any reference to it...
8041 */
8042 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8043 "PEReference: %%%s; not found\n",
8044 name, NULL);
8045 ctxt->valid = 0;
8046 }
8047 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8048 } else {
8049 /*
8050 * Internal checking in case the entity quest barfed
8051 */
8052 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8053 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8054 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055 "Internal: %%%s; is not a parameter entity\n",
8056 name, NULL);
8057 } else if (ctxt->input->free != deallocblankswrapper) {
8058 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8059 if (xmlPushInput(ctxt, input) < 0)
8060 return;
8061 } else {
8062 /*
8063 * TODO !!!
8064 * handle the extra spaces added before and after
8065 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8066 */
8067 input = xmlNewEntityInputStream(ctxt, entity);
8068 if (xmlPushInput(ctxt, input) < 0)
8069 return;
8070 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8071 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8072 (IS_BLANK_CH(NXT(5)))) {
8073 xmlParseTextDecl(ctxt);
8074 if (ctxt->errNo ==
8075 XML_ERR_UNSUPPORTED_ENCODING) {
8076 /*
8077 * The XML REC instructs us to stop parsing
8078 * right here
8079 */
8080 ctxt->instate = XML_PARSER_EOF;
8081 return;
8082 }
8083 }
8084 }
8085 }
8086 ctxt->hasPErefs = 1;
8087}
8088
8089/**
8090 * xmlLoadEntityContent:
8091 * @ctxt: an XML parser context
8092 * @entity: an unloaded system entity
8093 *
8094 * Load the original content of the given system entity from the
8095 * ExternalID/SystemID given. This is to be used for Included in Literal
8096 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8097 *
8098 * Returns 0 in case of success and -1 in case of failure
8099 */
8100static int
8101xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8102 xmlParserInputPtr input;
8103 xmlBufferPtr buf;
8104 int l, c;
8105 int count = 0;
8106
8107 if ((ctxt == NULL) || (entity == NULL) ||
8108 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8109 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8110 (entity->content != NULL)) {
8111 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8112 "xmlLoadEntityContent parameter error");
8113 return(-1);
8114 }
8115
8116 if (xmlParserDebugEntities)
8117 xmlGenericError(xmlGenericErrorContext,
8118 "Reading %s entity content input\n", entity->name);
8119
8120 buf = xmlBufferCreate();
8121 if (buf == NULL) {
8122 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8123 "xmlLoadEntityContent parameter error");
8124 return(-1);
8125 }
8126
8127 input = xmlNewEntityInputStream(ctxt, entity);
8128 if (input == NULL) {
8129 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8130 "xmlLoadEntityContent input error");
8131 xmlBufferFree(buf);
8132 return(-1);
8133 }
8134
8135 /*
8136 * Push the entity as the current input, read char by char
8137 * saving to the buffer until the end of the entity or an error
8138 */
8139 if (xmlPushInput(ctxt, input) < 0) {
8140 xmlBufferFree(buf);
8141 return(-1);
8142 }
8143
8144 GROW;
8145 c = CUR_CHAR(l);
8146 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8147 (IS_CHAR(c))) {
8148 xmlBufferAdd(buf, ctxt->input->cur, l);
8149 if (count++ > XML_PARSER_CHUNK_SIZE) {
8150 count = 0;
8151 GROW;
8152 if (ctxt->instate == XML_PARSER_EOF) {
8153 xmlBufferFree(buf);
8154 return(-1);
8155 }
8156 }
8157 NEXTL(l);
8158 c = CUR_CHAR(l);
8159 if (c == 0) {
8160 count = 0;
8161 GROW;
8162 if (ctxt->instate == XML_PARSER_EOF) {
8163 xmlBufferFree(buf);
8164 return(-1);
8165 }
8166 c = CUR_CHAR(l);
8167 }
8168 }
8169
8170 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8171 xmlPopInput(ctxt);
8172 } else if (!IS_CHAR(c)) {
8173 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8174 "xmlLoadEntityContent: invalid char value %d\n",
8175 c);
8176 xmlBufferFree(buf);
8177 return(-1);
8178 }
8179 entity->content = buf->content;
8180 buf->content = NULL;
8181 xmlBufferFree(buf);
8182
8183 return(0);
8184}
8185
8186/**
8187 * xmlParseStringPEReference:
8188 * @ctxt: an XML parser context
8189 * @str: a pointer to an index in the string
8190 *
8191 * parse PEReference declarations
8192 *
8193 * [69] PEReference ::= '%' Name ';'
8194 *
8195 * [ WFC: No Recursion ]
8196 * A parsed entity must not contain a recursive
8197 * reference to itself, either directly or indirectly.
8198 *
8199 * [ WFC: Entity Declared ]
8200 * In a document without any DTD, a document with only an internal DTD
8201 * subset which contains no parameter entity references, or a document
8202 * with "standalone='yes'", ... ... The declaration of a parameter
8203 * entity must precede any reference to it...
8204 *
8205 * [ VC: Entity Declared ]
8206 * In a document with an external subset or external parameter entities
8207 * with "standalone='no'", ... ... The declaration of a parameter entity
8208 * must precede any reference to it...
8209 *
8210 * [ WFC: In DTD ]
8211 * Parameter-entity references may only appear in the DTD.
8212 * NOTE: misleading but this is handled.
8213 *
8214 * Returns the string of the entity content.
8215 * str is updated to the current value of the index
8216 */
8217static xmlEntityPtr
8218xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8219 const xmlChar *ptr;
8220 xmlChar cur;
8221 xmlChar *name;
8222 xmlEntityPtr entity = NULL;
8223
8224 if ((str == NULL) || (*str == NULL)) return(NULL);
8225 ptr = *str;
8226 cur = *ptr;
8227 if (cur != '%')
8228 return(NULL);
8229 ptr++;
8230 name = xmlParseStringName(ctxt, &ptr);
8231 if (name == NULL) {
8232 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8233 "xmlParseStringPEReference: no name\n");
8234 *str = ptr;
8235 return(NULL);
8236 }
8237 cur = *ptr;
8238 if (cur != ';') {
8239 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8240 xmlFree(name);
8241 *str = ptr;
8242 return(NULL);
8243 }
8244 ptr++;
8245
8246 /*
8247 * Increate the number of entity references parsed
8248 */
8249 ctxt->nbentities++;
8250
8251 /*
8252 * Request the entity from SAX
8253 */
8254 if ((ctxt->sax != NULL) &&
8255 (ctxt->sax->getParameterEntity != NULL))
8256 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8257 if (ctxt->instate == XML_PARSER_EOF) {
8258 xmlFree(name);
8259 return(NULL);
8260 }
8261 if (entity == NULL) {
8262 /*
8263 * [ WFC: Entity Declared ]
8264 * In a document without any DTD, a document with only an
8265 * internal DTD subset which contains no parameter entity
8266 * references, or a document with "standalone='yes'", ...
8267 * ... The declaration of a parameter entity must precede
8268 * any reference to it...
8269 */
8270 if ((ctxt->standalone == 1) ||
8271 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8272 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8273 "PEReference: %%%s; not found\n", name);
8274 } else {
8275 /*
8276 * [ VC: Entity Declared ]
8277 * In a document with an external subset or external
8278 * parameter entities with "standalone='no'", ...
8279 * ... The declaration of a parameter entity must
8280 * precede any reference to it...
8281 */
8282 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8283 "PEReference: %%%s; not found\n",
8284 name, NULL);
8285 ctxt->valid = 0;
8286 }
8287 xmlParserEntityCheck(ctxt, 0, NULL, 0);
8288 } else {
8289 /*
8290 * Internal checking in case the entity quest barfed
8291 */
8292 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8293 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8294 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8295 "%%%s; is not a parameter entity\n",
8296 name, NULL);
8297 }
8298 }
8299 ctxt->hasPErefs = 1;
8300 xmlFree(name);
8301 *str = ptr;
8302 return(entity);
8303}
8304
8305/**
8306 * xmlParseDocTypeDecl:
8307 * @ctxt: an XML parser context
8308 *
8309 * parse a DOCTYPE declaration
8310 *
8311 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8312 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8313 *
8314 * [ VC: Root Element Type ]
8315 * The Name in the document type declaration must match the element
8316 * type of the root element.
8317 */
8318
8319void
8320xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8321 const xmlChar *name = NULL;
8322 xmlChar *ExternalID = NULL;
8323 xmlChar *URI = NULL;
8324
8325 /*
8326 * We know that '<!DOCTYPE' has been detected.
8327 */
8328 SKIP(9);
8329
8330 SKIP_BLANKS;
8331
8332 /*
8333 * Parse the DOCTYPE name.
8334 */
8335 name = xmlParseName(ctxt);
8336 if (name == NULL) {
8337 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8338 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8339 }
8340 ctxt->intSubName = name;
8341
8342 SKIP_BLANKS;
8343
8344 /*
8345 * Check for SystemID and ExternalID
8346 */
8347 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8348
8349 if ((URI != NULL) || (ExternalID != NULL)) {
8350 ctxt->hasExternalSubset = 1;
8351 }
8352 ctxt->extSubURI = URI;
8353 ctxt->extSubSystem = ExternalID;
8354
8355 SKIP_BLANKS;
8356
8357 /*
8358 * Create and update the internal subset.
8359 */
8360 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8361 (!ctxt->disableSAX))
8362 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8363 if (ctxt->instate == XML_PARSER_EOF)
8364 return;
8365
8366 /*
8367 * Is there any internal subset declarations ?
8368 * they are handled separately in xmlParseInternalSubset()
8369 */
8370 if (RAW == '[')
8371 return;
8372
8373 /*
8374 * We should be at the end of the DOCTYPE declaration.
8375 */
8376 if (RAW != '>') {
8377 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8378 }
8379 NEXT;
8380}
8381
8382/**
8383 * xmlParseInternalSubset:
8384 * @ctxt: an XML parser context
8385 *
8386 * parse the internal subset declaration
8387 *
8388 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8389 */
8390
8391static void
8392xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8393 /*
8394 * Is there any DTD definition ?
8395 */
8396 if (RAW == '[') {
8397 ctxt->instate = XML_PARSER_DTD;
8398 NEXT;
8399 /*
8400 * Parse the succession of Markup declarations and
8401 * PEReferences.
8402 * Subsequence (markupdecl | PEReference | S)*
8403 */
8404 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
8405 const xmlChar *check = CUR_PTR;
8406 unsigned int cons = ctxt->input->consumed;
8407
8408 SKIP_BLANKS;
8409 xmlParseMarkupDecl(ctxt);
8410 xmlParsePEReference(ctxt);
8411
8412 /*
8413 * Pop-up of finished entities.
8414 */
8415 while ((RAW == 0) && (ctxt->inputNr > 1))
8416 xmlPopInput(ctxt);
8417
8418 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
8419 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8420 "xmlParseInternalSubset: error detected in Markup declaration\n");
8421 break;
8422 }
8423 }
8424 if (RAW == ']') {
8425 NEXT;
8426 SKIP_BLANKS;
8427 }
8428 }
8429
8430 /*
8431 * We should be at the end of the DOCTYPE declaration.
8432 */
8433 if (RAW != '>') {
8434 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8435 }
8436 NEXT;
8437}
8438
8439#ifdef LIBXML_SAX1_ENABLED
8440/**
8441 * xmlParseAttribute:
8442 * @ctxt: an XML parser context
8443 * @value: a xmlChar ** used to store the value of the attribute
8444 *
8445 * parse an attribute
8446 *
8447 * [41] Attribute ::= Name Eq AttValue
8448 *
8449 * [ WFC: No External Entity References ]
8450 * Attribute values cannot contain direct or indirect entity references
8451 * to external entities.
8452 *
8453 * [ WFC: No < in Attribute Values ]
8454 * The replacement text of any entity referred to directly or indirectly in
8455 * an attribute value (other than "&lt;") must not contain a <.
8456 *
8457 * [ VC: Attribute Value Type ]
8458 * The attribute must have been declared; the value must be of the type
8459 * declared for it.
8460 *
8461 * [25] Eq ::= S? '=' S?
8462 *
8463 * With namespace:
8464 *
8465 * [NS 11] Attribute ::= QName Eq AttValue
8466 *
8467 * Also the case QName == xmlns:??? is handled independently as a namespace
8468 * definition.
8469 *
8470 * Returns the attribute name, and the value in *value.
8471 */
8472
8473const xmlChar *
8474xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8475 const xmlChar *name;
8476 xmlChar *val;
8477
8478 *value = NULL;
8479 GROW;
8480 name = xmlParseName(ctxt);
8481 if (name == NULL) {
8482 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8483 "error parsing attribute name\n");
8484 return(NULL);
8485 }
8486
8487 /*
8488 * read the value
8489 */
8490 SKIP_BLANKS;
8491 if (RAW == '=') {
8492 NEXT;
8493 SKIP_BLANKS;
8494 val = xmlParseAttValue(ctxt);
8495 ctxt->instate = XML_PARSER_CONTENT;
8496 } else {
8497 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8498 "Specification mandate value for attribute %s\n", name);
8499 return(NULL);
8500 }
8501
8502 /*
8503 * Check that xml:lang conforms to the specification
8504 * No more registered as an error, just generate a warning now
8505 * since this was deprecated in XML second edition
8506 */
8507 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8508 if (!xmlCheckLanguageID(val)) {
8509 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8510 "Malformed value for xml:lang : %s\n",
8511 val, NULL);
8512 }
8513 }
8514
8515 /*
8516 * Check that xml:space conforms to the specification
8517 */
8518 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8519 if (xmlStrEqual(val, BAD_CAST "default"))
8520 *(ctxt->space) = 0;
8521 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8522 *(ctxt->space) = 1;
8523 else {
8524 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8525"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8526 val, NULL);
8527 }
8528 }
8529
8530 *value = val;
8531 return(name);
8532}
8533
8534/**
8535 * xmlParseStartTag:
8536 * @ctxt: an XML parser context
8537 *
8538 * parse a start of tag either for rule element or
8539 * EmptyElement. In both case we don't parse the tag closing chars.
8540 *
8541 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8542 *
8543 * [ WFC: Unique Att Spec ]
8544 * No attribute name may appear more than once in the same start-tag or
8545 * empty-element tag.
8546 *
8547 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8548 *
8549 * [ WFC: Unique Att Spec ]
8550 * No attribute name may appear more than once in the same start-tag or
8551 * empty-element tag.
8552 *
8553 * With namespace:
8554 *
8555 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8556 *
8557 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8558 *
8559 * Returns the element name parsed
8560 */
8561
8562const xmlChar *
8563xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8564 const xmlChar *name;
8565 const xmlChar *attname;
8566 xmlChar *attvalue;
8567 const xmlChar **atts = ctxt->atts;
8568 int nbatts = 0;
8569 int maxatts = ctxt->maxatts;
8570 int i;
8571
8572 if (RAW != '<') return(NULL);
8573 NEXT1;
8574
8575 name = xmlParseName(ctxt);
8576 if (name == NULL) {
8577 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8578 "xmlParseStartTag: invalid element name\n");
8579 return(NULL);
8580 }
8581
8582 /*
8583 * Now parse the attributes, it ends up with the ending
8584 *
8585 * (S Attribute)* S?
8586 */
8587 SKIP_BLANKS;
8588 GROW;
8589
8590 while (((RAW != '>') &&
8591 ((RAW != '/') || (NXT(1) != '>')) &&
8592 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8593 const xmlChar *q = CUR_PTR;
8594 unsigned int cons = ctxt->input->consumed;
8595
8596 attname = xmlParseAttribute(ctxt, &attvalue);
8597 if ((attname != NULL) && (attvalue != NULL)) {
8598 /*
8599 * [ WFC: Unique Att Spec ]
8600 * No attribute name may appear more than once in the same
8601 * start-tag or empty-element tag.
8602 */
8603 for (i = 0; i < nbatts;i += 2) {
8604 if (xmlStrEqual(atts[i], attname)) {
8605 xmlErrAttributeDup(ctxt, NULL, attname);
8606 xmlFree(attvalue);
8607 goto failed;
8608 }
8609 }
8610 /*
8611 * Add the pair to atts
8612 */
8613 if (atts == NULL) {
8614 maxatts = 22; /* allow for 10 attrs by default */
8615 atts = (const xmlChar **)
8616 xmlMalloc(maxatts * sizeof(xmlChar *));
8617 if (atts == NULL) {
8618 xmlErrMemory(ctxt, NULL);
8619 if (attvalue != NULL)
8620 xmlFree(attvalue);
8621 goto failed;
8622 }
8623 ctxt->atts = atts;
8624 ctxt->maxatts = maxatts;
8625 } else if (nbatts + 4 > maxatts) {
8626 const xmlChar **n;
8627
8628 maxatts *= 2;
8629 n = (const xmlChar **) xmlRealloc((void *) atts,
8630 maxatts * sizeof(const xmlChar *));
8631 if (n == NULL) {
8632 xmlErrMemory(ctxt, NULL);
8633 if (attvalue != NULL)
8634 xmlFree(attvalue);
8635 goto failed;
8636 }
8637 atts = n;
8638 ctxt->atts = atts;
8639 ctxt->maxatts = maxatts;
8640 }
8641 atts[nbatts++] = attname;
8642 atts[nbatts++] = attvalue;
8643 atts[nbatts] = NULL;
8644 atts[nbatts + 1] = NULL;
8645 } else {
8646 if (attvalue != NULL)
8647 xmlFree(attvalue);
8648 }
8649
8650failed:
8651
8652 GROW
8653 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8654 break;
8655 if (!IS_BLANK_CH(RAW)) {
8656 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8657 "attributes construct error\n");
8658 }
8659 SKIP_BLANKS;
8660 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8661 (attname == NULL) && (attvalue == NULL)) {
8662 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8663 "xmlParseStartTag: problem parsing attributes\n");
8664 break;
8665 }
8666 SHRINK;
8667 GROW;
8668 }
8669
8670 /*
8671 * SAX: Start of Element !
8672 */
8673 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8674 (!ctxt->disableSAX)) {
8675 if (nbatts > 0)
8676 ctxt->sax->startElement(ctxt->userData, name, atts);
8677 else
8678 ctxt->sax->startElement(ctxt->userData, name, NULL);
8679 }
8680
8681 if (atts != NULL) {
8682 /* Free only the content strings */
8683 for (i = 1;i < nbatts;i+=2)
8684 if (atts[i] != NULL)
8685 xmlFree((xmlChar *) atts[i]);
8686 }
8687 return(name);
8688}
8689
8690/**
8691 * xmlParseEndTag1:
8692 * @ctxt: an XML parser context
8693 * @line: line of the start tag
8694 * @nsNr: number of namespaces on the start tag
8695 *
8696 * parse an end of tag
8697 *
8698 * [42] ETag ::= '</' Name S? '>'
8699 *
8700 * With namespace
8701 *
8702 * [NS 9] ETag ::= '</' QName S? '>'
8703 */
8704
8705static void
8706xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8707 const xmlChar *name;
8708
8709 GROW;
8710 if ((RAW != '<') || (NXT(1) != '/')) {
8711 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8712 "xmlParseEndTag: '</' not found\n");
8713 return;
8714 }
8715 SKIP(2);
8716
8717 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8718
8719 /*
8720 * We should definitely be at the ending "S? '>'" part
8721 */
8722 GROW;
8723 SKIP_BLANKS;
8724 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8725 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8726 } else
8727 NEXT1;
8728
8729 /*
8730 * [ WFC: Element Type Match ]
8731 * The Name in an element's end-tag must match the element type in the
8732 * start-tag.
8733 *
8734 */
8735 if (name != (xmlChar*)1) {
8736 if (name == NULL) name = BAD_CAST "unparseable";
8737 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8738 "Opening and ending tag mismatch: %s line %d and %s\n",
8739 ctxt->name, line, name);
8740 }
8741
8742 /*
8743 * SAX: End of Tag
8744 */
8745 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8746 (!ctxt->disableSAX))
8747 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8748
8749 namePop(ctxt);
8750 spacePop(ctxt);
8751 return;
8752}
8753
8754/**
8755 * xmlParseEndTag:
8756 * @ctxt: an XML parser context
8757 *
8758 * parse an end of tag
8759 *
8760 * [42] ETag ::= '</' Name S? '>'
8761 *
8762 * With namespace
8763 *
8764 * [NS 9] ETag ::= '</' QName S? '>'
8765 */
8766
8767void
8768xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8769 xmlParseEndTag1(ctxt, 0);
8770}
8771#endif /* LIBXML_SAX1_ENABLED */
8772
8773/************************************************************************
8774 * *
8775 * SAX 2 specific operations *
8776 * *
8777 ************************************************************************/
8778
8779/*
8780 * xmlGetNamespace:
8781 * @ctxt: an XML parser context
8782 * @prefix: the prefix to lookup
8783 *
8784 * Lookup the namespace name for the @prefix (which ca be NULL)
8785 * The prefix must come from the @ctxt->dict dictionnary
8786 *
8787 * Returns the namespace name or NULL if not bound
8788 */
8789static const xmlChar *
8790xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8791 int i;
8792
8793 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8794 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8795 if (ctxt->nsTab[i] == prefix) {
8796 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8797 return(NULL);
8798 return(ctxt->nsTab[i + 1]);
8799 }
8800 return(NULL);
8801}
8802
8803/**
8804 * xmlParseQName:
8805 * @ctxt: an XML parser context
8806 * @prefix: pointer to store the prefix part
8807 *
8808 * parse an XML Namespace QName
8809 *
8810 * [6] QName ::= (Prefix ':')? LocalPart
8811 * [7] Prefix ::= NCName
8812 * [8] LocalPart ::= NCName
8813 *
8814 * Returns the Name parsed or NULL
8815 */
8816
8817static const xmlChar *
8818xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8819 const xmlChar *l, *p;
8820
8821 GROW;
8822
8823 l = xmlParseNCName(ctxt);
8824 if (l == NULL) {
8825 if (CUR == ':') {
8826 l = xmlParseName(ctxt);
8827 if (l != NULL) {
8828 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8829 "Failed to parse QName '%s'\n", l, NULL, NULL);
8830 *prefix = NULL;
8831 return(l);
8832 }
8833 }
8834 return(NULL);
8835 }
8836 if (CUR == ':') {
8837 NEXT;
8838 p = l;
8839 l = xmlParseNCName(ctxt);
8840 if (l == NULL) {
8841 xmlChar *tmp;
8842
8843 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8844 "Failed to parse QName '%s:'\n", p, NULL, NULL);
8845 l = xmlParseNmtoken(ctxt);
8846 if (l == NULL)
8847 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8848 else {
8849 tmp = xmlBuildQName(l, p, NULL, 0);
8850 xmlFree((char *)l);
8851 }
8852 p = xmlDictLookup(ctxt->dict, tmp, -1);
8853 if (tmp != NULL) xmlFree(tmp);
8854 *prefix = NULL;
8855 return(p);
8856 }
8857 if (CUR == ':') {
8858 xmlChar *tmp;
8859
8860 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8861 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8862 NEXT;
8863 tmp = (xmlChar *) xmlParseName(ctxt);
8864 if (tmp != NULL) {
8865 tmp = xmlBuildQName(tmp, l, NULL, 0);
8866 l = xmlDictLookup(ctxt->dict, tmp, -1);
8867 if (tmp != NULL) xmlFree(tmp);
8868 *prefix = p;
8869 return(l);
8870 }
8871 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8872 l = xmlDictLookup(ctxt->dict, tmp, -1);
8873 if (tmp != NULL) xmlFree(tmp);
8874 *prefix = p;
8875 return(l);
8876 }
8877 *prefix = p;
8878 } else
8879 *prefix = NULL;
8880 return(l);
8881}
8882
8883/**
8884 * xmlParseQNameAndCompare:
8885 * @ctxt: an XML parser context
8886 * @name: the localname
8887 * @prefix: the prefix, if any.
8888 *
8889 * parse an XML name and compares for match
8890 * (specialized for endtag parsing)
8891 *
8892 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8893 * and the name for mismatch
8894 */
8895
8896static const xmlChar *
8897xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8898 xmlChar const *prefix) {
8899 const xmlChar *cmp;
8900 const xmlChar *in;
8901 const xmlChar *ret;
8902 const xmlChar *prefix2;
8903
8904 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8905
8906 GROW;
8907 in = ctxt->input->cur;
8908
8909 cmp = prefix;
8910 while (*in != 0 && *in == *cmp) {
8911 ++in;
8912 ++cmp;
8913 }
8914 if ((*cmp == 0) && (*in == ':')) {
8915 in++;
8916 cmp = name;
8917 while (*in != 0 && *in == *cmp) {
8918 ++in;
8919 ++cmp;
8920 }
8921 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8922 /* success */
8923 ctxt->input->cur = in;
8924 return((const xmlChar*) 1);
8925 }
8926 }
8927 /*
8928 * all strings coms from the dictionary, equality can be done directly
8929 */
8930 ret = xmlParseQName (ctxt, &prefix2);
8931 if ((ret == name) && (prefix == prefix2))
8932 return((const xmlChar*) 1);
8933 return ret;
8934}
8935
8936/**
8937 * xmlParseAttValueInternal:
8938 * @ctxt: an XML parser context
8939 * @len: attribute len result
8940 * @alloc: whether the attribute was reallocated as a new string
8941 * @normalize: if 1 then further non-CDATA normalization must be done
8942 *
8943 * parse a value for an attribute.
8944 * NOTE: if no normalization is needed, the routine will return pointers
8945 * directly from the data buffer.
8946 *
8947 * 3.3.3 Attribute-Value Normalization:
8948 * Before the value of an attribute is passed to the application or
8949 * checked for validity, the XML processor must normalize it as follows:
8950 * - a character reference is processed by appending the referenced
8951 * character to the attribute value
8952 * - an entity reference is processed by recursively processing the
8953 * replacement text of the entity
8954 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8955 * appending #x20 to the normalized value, except that only a single
8956 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8957 * parsed entity or the literal entity value of an internal parsed entity
8958 * - other characters are processed by appending them to the normalized value
8959 * If the declared value is not CDATA, then the XML processor must further
8960 * process the normalized attribute value by discarding any leading and
8961 * trailing space (#x20) characters, and by replacing sequences of space
8962 * (#x20) characters by a single space (#x20) character.
8963 * All attributes for which no declaration has been read should be treated
8964 * by a non-validating parser as if declared CDATA.
8965 *
8966 * Returns the AttValue parsed or NULL. The value has to be freed by the
8967 * caller if it was copied, this can be detected by val[*len] == 0.
8968 */
8969
8970static xmlChar *
8971xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8972 int normalize)
8973{
8974 xmlChar limit = 0;
8975 const xmlChar *in = NULL, *start, *end, *last;
8976 xmlChar *ret = NULL;
8977 int line, col;
8978
8979 GROW;
8980 in = (xmlChar *) CUR_PTR;
8981 line = ctxt->input->line;
8982 col = ctxt->input->col;
8983 if (*in != '"' && *in != '\'') {
8984 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8985 return (NULL);
8986 }
8987 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8988
8989 /*
8990 * try to handle in this routine the most common case where no
8991 * allocation of a new string is required and where content is
8992 * pure ASCII.
8993 */
8994 limit = *in++;
8995 col++;
8996 end = ctxt->input->end;
8997 start = in;
8998 if (in >= end) {
8999 const xmlChar *oldbase = ctxt->input->base;
9000 GROW;
9001 if (oldbase != ctxt->input->base) {
9002 long delta = ctxt->input->base - oldbase;
9003 start = start + delta;
9004 in = in + delta;
9005 }
9006 end = ctxt->input->end;
9007 }
9008 if (normalize) {
9009 /*
9010 * Skip any leading spaces
9011 */
9012 while ((in < end) && (*in != limit) &&
9013 ((*in == 0x20) || (*in == 0x9) ||
9014 (*in == 0xA) || (*in == 0xD))) {
9015 if (*in == 0xA) {
9016 line++; col = 1;
9017 } else {
9018 col++;
9019 }
9020 in++;
9021 start = in;
9022 if (in >= end) {
9023 const xmlChar *oldbase = ctxt->input->base;
9024 GROW;
9025 if (ctxt->instate == XML_PARSER_EOF)
9026 return(NULL);
9027 if (oldbase != ctxt->input->base) {
9028 long delta = ctxt->input->base - oldbase;
9029 start = start + delta;
9030 in = in + delta;
9031 }
9032 end = ctxt->input->end;
9033 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9034 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9035 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9036 "AttValue length too long\n");
9037 return(NULL);
9038 }
9039 }
9040 }
9041 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9042 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9043 col++;
9044 if ((*in++ == 0x20) && (*in == 0x20)) break;
9045 if (in >= end) {
9046 const xmlChar *oldbase = ctxt->input->base;
9047 GROW;
9048 if (ctxt->instate == XML_PARSER_EOF)
9049 return(NULL);
9050 if (oldbase != ctxt->input->base) {
9051 long delta = ctxt->input->base - oldbase;
9052 start = start + delta;
9053 in = in + delta;
9054 }
9055 end = ctxt->input->end;
9056 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9057 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9058 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9059 "AttValue length too long\n");
9060 return(NULL);
9061 }
9062 }
9063 }
9064 last = in;
9065 /*
9066 * skip the trailing blanks
9067 */
9068 while ((last[-1] == 0x20) && (last > start)) last--;
9069 while ((in < end) && (*in != limit) &&
9070 ((*in == 0x20) || (*in == 0x9) ||
9071 (*in == 0xA) || (*in == 0xD))) {
9072 if (*in == 0xA) {
9073 line++, col = 1;
9074 } else {
9075 col++;
9076 }
9077 in++;
9078 if (in >= end) {
9079 const xmlChar *oldbase = ctxt->input->base;
9080 GROW;
9081 if (ctxt->instate == XML_PARSER_EOF)
9082 return(NULL);
9083 if (oldbase != ctxt->input->base) {
9084 long delta = ctxt->input->base - oldbase;
9085 start = start + delta;
9086 in = in + delta;
9087 last = last + delta;
9088 }
9089 end = ctxt->input->end;
9090 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9091 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9092 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9093 "AttValue length too long\n");
9094 return(NULL);
9095 }
9096 }
9097 }
9098 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9099 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9100 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9101 "AttValue length too long\n");
9102 return(NULL);
9103 }
9104 if (*in != limit) goto need_complex;
9105 } else {
9106 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9107 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9108 in++;
9109 col++;
9110 if (in >= end) {
9111 const xmlChar *oldbase = ctxt->input->base;
9112 GROW;
9113 if (ctxt->instate == XML_PARSER_EOF)
9114 return(NULL);
9115 if (oldbase != ctxt->input->base) {
9116 long delta = ctxt->input->base - oldbase;
9117 start = start + delta;
9118 in = in + delta;
9119 }
9120 end = ctxt->input->end;
9121 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9122 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9123 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9124 "AttValue length too long\n");
9125 return(NULL);
9126 }
9127 }
9128 }
9129 last = in;
9130 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9131 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9132 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9133 "AttValue length too long\n");
9134 return(NULL);
9135 }
9136 if (*in != limit) goto need_complex;
9137 }
9138 in++;
9139 col++;
9140 if (len != NULL) {
9141 *len = last - start;
9142 ret = (xmlChar *) start;
9143 } else {
9144 if (alloc) *alloc = 1;
9145 ret = xmlStrndup(start, last - start);
9146 }
9147 CUR_PTR = in;
9148 ctxt->input->line = line;
9149 ctxt->input->col = col;
9150 if (alloc) *alloc = 0;
9151 return ret;
9152need_complex:
9153 if (alloc) *alloc = 1;
9154 return xmlParseAttValueComplex(ctxt, len, normalize);
9155}
9156
9157/**
9158 * xmlParseAttribute2:
9159 * @ctxt: an XML parser context
9160 * @pref: the element prefix
9161 * @elem: the element name
9162 * @prefix: a xmlChar ** used to store the value of the attribute prefix
9163 * @value: a xmlChar ** used to store the value of the attribute
9164 * @len: an int * to save the length of the attribute
9165 * @alloc: an int * to indicate if the attribute was allocated
9166 *
9167 * parse an attribute in the new SAX2 framework.
9168 *
9169 * Returns the attribute name, and the value in *value, .
9170 */
9171
9172static const xmlChar *
9173xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9174 const xmlChar * pref, const xmlChar * elem,
9175 const xmlChar ** prefix, xmlChar ** value,
9176 int *len, int *alloc)
9177{
9178 const xmlChar *name;
9179 xmlChar *val, *internal_val = NULL;
9180 int normalize = 0;
9181
9182 *value = NULL;
9183 GROW;
9184 name = xmlParseQName(ctxt, prefix);
9185 if (name == NULL) {
9186 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9187 "error parsing attribute name\n");
9188 return (NULL);
9189 }
9190
9191 /*
9192 * get the type if needed
9193 */
9194 if (ctxt->attsSpecial != NULL) {
9195 int type;
9196
9197 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
9198 pref, elem, *prefix, name);
9199 if (type != 0)
9200 normalize = 1;
9201 }
9202
9203 /*
9204 * read the value
9205 */
9206 SKIP_BLANKS;
9207 if (RAW == '=') {
9208 NEXT;
9209 SKIP_BLANKS;
9210 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9211 if (normalize) {
9212 /*
9213 * Sometimes a second normalisation pass for spaces is needed
9214 * but that only happens if charrefs or entities refernces
9215 * have been used in the attribute value, i.e. the attribute
9216 * value have been extracted in an allocated string already.
9217 */
9218 if (*alloc) {
9219 const xmlChar *val2;
9220
9221 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9222 if ((val2 != NULL) && (val2 != val)) {
9223 xmlFree(val);
9224 val = (xmlChar *) val2;
9225 }
9226 }
9227 }
9228 ctxt->instate = XML_PARSER_CONTENT;
9229 } else {
9230 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9231 "Specification mandate value for attribute %s\n",
9232 name);
9233 return (NULL);
9234 }
9235
9236 if (*prefix == ctxt->str_xml) {
9237 /*
9238 * Check that xml:lang conforms to the specification
9239 * No more registered as an error, just generate a warning now
9240 * since this was deprecated in XML second edition
9241 */
9242 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9243 internal_val = xmlStrndup(val, *len);
9244 if (!xmlCheckLanguageID(internal_val)) {
9245 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9246 "Malformed value for xml:lang : %s\n",
9247 internal_val, NULL);
9248 }
9249 }
9250
9251 /*
9252 * Check that xml:space conforms to the specification
9253 */
9254 if (xmlStrEqual(name, BAD_CAST "space")) {
9255 internal_val = xmlStrndup(val, *len);
9256 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9257 *(ctxt->space) = 0;
9258 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9259 *(ctxt->space) = 1;
9260 else {
9261 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9262 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9263 internal_val, NULL);
9264 }
9265 }
9266 if (internal_val) {
9267 xmlFree(internal_val);
9268 }
9269 }
9270
9271 *value = val;
9272 return (name);
9273}
9274/**
9275 * xmlParseStartTag2:
9276 * @ctxt: an XML parser context
9277 *
9278 * parse a start of tag either for rule element or
9279 * EmptyElement. In both case we don't parse the tag closing chars.
9280 * This routine is called when running SAX2 parsing
9281 *
9282 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9283 *
9284 * [ WFC: Unique Att Spec ]
9285 * No attribute name may appear more than once in the same start-tag or
9286 * empty-element tag.
9287 *
9288 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9289 *
9290 * [ WFC: Unique Att Spec ]
9291 * No attribute name may appear more than once in the same start-tag or
9292 * empty-element tag.
9293 *
9294 * With namespace:
9295 *
9296 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9297 *
9298 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9299 *
9300 * Returns the element name parsed
9301 */
9302
9303static const xmlChar *
9304xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9305 const xmlChar **URI, int *tlen) {
9306 const xmlChar *localname;
9307 const xmlChar *prefix;
9308 const xmlChar *attname;
9309 const xmlChar *aprefix;
9310 const xmlChar *nsname;
9311 xmlChar *attvalue;
9312 const xmlChar **atts = ctxt->atts;
9313 int maxatts = ctxt->maxatts;
9314 int nratts, nbatts, nbdef;
9315 int i, j, nbNs, attval, oldline, oldcol;
9316 const xmlChar *base;
9317 unsigned long cur;
9318 int nsNr = ctxt->nsNr;
9319
9320 if (RAW != '<') return(NULL);
9321 NEXT1;
9322
9323 /*
9324 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9325 * point since the attribute values may be stored as pointers to
9326 * the buffer and calling SHRINK would destroy them !
9327 * The Shrinking is only possible once the full set of attribute
9328 * callbacks have been done.
9329 */
9330reparse:
9331 SHRINK;
9332 base = ctxt->input->base;
9333 cur = ctxt->input->cur - ctxt->input->base;
9334 oldline = ctxt->input->line;
9335 oldcol = ctxt->input->col;
9336 nbatts = 0;
9337 nratts = 0;
9338 nbdef = 0;
9339 nbNs = 0;
9340 attval = 0;
9341 /* Forget any namespaces added during an earlier parse of this element. */
9342 ctxt->nsNr = nsNr;
9343
9344 localname = xmlParseQName(ctxt, &prefix);
9345 if (localname == NULL) {
9346 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9347 "StartTag: invalid element name\n");
9348 return(NULL);
9349 }
9350 *tlen = ctxt->input->cur - ctxt->input->base - cur;
9351
9352 /*
9353 * Now parse the attributes, it ends up with the ending
9354 *
9355 * (S Attribute)* S?
9356 */
9357 SKIP_BLANKS;
9358 GROW;
9359 if (ctxt->input->base != base) goto base_changed;
9360
9361 while (((RAW != '>') &&
9362 ((RAW != '/') || (NXT(1) != '>')) &&
9363 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9364 const xmlChar *q = CUR_PTR;
9365 unsigned int cons = ctxt->input->consumed;
9366 int len = -1, alloc = 0;
9367
9368 attname = xmlParseAttribute2(ctxt, prefix, localname,
9369 &aprefix, &attvalue, &len, &alloc);
9370 if (ctxt->input->base != base) {
9371 if ((attvalue != NULL) && (alloc != 0))
9372 xmlFree(attvalue);
9373 attvalue = NULL;
9374 goto base_changed;
9375 }
9376 if ((attname != NULL) && (attvalue != NULL)) {
9377 if (len < 0) len = xmlStrlen(attvalue);
9378 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9379 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9380 xmlURIPtr uri;
9381
9382 if (URL == NULL) {
9383 xmlErrMemory(ctxt, "dictionary allocation failure");
9384 if ((attvalue != NULL) && (alloc != 0))
9385 xmlFree(attvalue);
9386 return(NULL);
9387 }
9388 if (*URL != 0) {
9389 uri = xmlParseURI((const char *) URL);
9390 if (uri == NULL) {
9391 xmlNsErr(ctxt, XML_WAR_NS_URI,
9392 "xmlns: '%s' is not a valid URI\n",
9393 URL, NULL, NULL);
9394 } else {
9395 if (uri->scheme == NULL) {
9396 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9397 "xmlns: URI %s is not absolute\n",
9398 URL, NULL, NULL);
9399 }
9400 xmlFreeURI(uri);
9401 }
9402 if (URL == ctxt->str_xml_ns) {
9403 if (attname != ctxt->str_xml) {
9404 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9405 "xml namespace URI cannot be the default namespace\n",
9406 NULL, NULL, NULL);
9407 }
9408 goto skip_default_ns;
9409 }
9410 if ((len == 29) &&
9411 (xmlStrEqual(URL,
9412 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9413 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9414 "reuse of the xmlns namespace name is forbidden\n",
9415 NULL, NULL, NULL);
9416 goto skip_default_ns;
9417 }
9418 }
9419 /*
9420 * check that it's not a defined namespace
9421 */
9422 for (j = 1;j <= nbNs;j++)
9423 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9424 break;
9425 if (j <= nbNs)
9426 xmlErrAttributeDup(ctxt, NULL, attname);
9427 else
9428 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9429skip_default_ns:
9430 if (alloc != 0) xmlFree(attvalue);
9431 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9432 break;
9433 if (!IS_BLANK_CH(RAW)) {
9434 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9435 "attributes construct error\n");
9436 break;
9437 }
9438 SKIP_BLANKS;
9439 continue;
9440 }
9441 if (aprefix == ctxt->str_xmlns) {
9442 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9443 xmlURIPtr uri;
9444
9445 if (attname == ctxt->str_xml) {
9446 if (URL != ctxt->str_xml_ns) {
9447 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9448 "xml namespace prefix mapped to wrong URI\n",
9449 NULL, NULL, NULL);
9450 }
9451 /*
9452 * Do not keep a namespace definition node
9453 */
9454 goto skip_ns;
9455 }
9456 if (URL == ctxt->str_xml_ns) {
9457 if (attname != ctxt->str_xml) {
9458 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459 "xml namespace URI mapped to wrong prefix\n",
9460 NULL, NULL, NULL);
9461 }
9462 goto skip_ns;
9463 }
9464 if (attname == ctxt->str_xmlns) {
9465 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9466 "redefinition of the xmlns prefix is forbidden\n",
9467 NULL, NULL, NULL);
9468 goto skip_ns;
9469 }
9470 if ((len == 29) &&
9471 (xmlStrEqual(URL,
9472 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9473 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9474 "reuse of the xmlns namespace name is forbidden\n",
9475 NULL, NULL, NULL);
9476 goto skip_ns;
9477 }
9478 if ((URL == NULL) || (URL[0] == 0)) {
9479 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9480 "xmlns:%s: Empty XML namespace is not allowed\n",
9481 attname, NULL, NULL);
9482 goto skip_ns;
9483 } else {
9484 uri = xmlParseURI((const char *) URL);
9485 if (uri == NULL) {
9486 xmlNsErr(ctxt, XML_WAR_NS_URI,
9487 "xmlns:%s: '%s' is not a valid URI\n",
9488 attname, URL, NULL);
9489 } else {
9490 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9491 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9492 "xmlns:%s: URI %s is not absolute\n",
9493 attname, URL, NULL);
9494 }
9495 xmlFreeURI(uri);
9496 }
9497 }
9498
9499 /*
9500 * check that it's not a defined namespace
9501 */
9502 for (j = 1;j <= nbNs;j++)
9503 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9504 break;
9505 if (j <= nbNs)
9506 xmlErrAttributeDup(ctxt, aprefix, attname);
9507 else
9508 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9509skip_ns:
9510 if (alloc != 0) xmlFree(attvalue);
9511 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9512 break;
9513 if (!IS_BLANK_CH(RAW)) {
9514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9515 "attributes construct error\n");
9516 break;
9517 }
9518 SKIP_BLANKS;
9519 if (ctxt->input->base != base) goto base_changed;
9520 continue;
9521 }
9522
9523 /*
9524 * Add the pair to atts
9525 */
9526 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9527 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9528 if (attvalue[len] == 0)
9529 xmlFree(attvalue);
9530 goto failed;
9531 }
9532 maxatts = ctxt->maxatts;
9533 atts = ctxt->atts;
9534 }
9535 ctxt->attallocs[nratts++] = alloc;
9536 atts[nbatts++] = attname;
9537 atts[nbatts++] = aprefix;
9538 atts[nbatts++] = NULL; /* the URI will be fetched later */
9539 atts[nbatts++] = attvalue;
9540 attvalue += len;
9541 atts[nbatts++] = attvalue;
9542 /*
9543 * tag if some deallocation is needed
9544 */
9545 if (alloc != 0) attval = 1;
9546 } else {
9547 if ((attvalue != NULL) && (attvalue[len] == 0))
9548 xmlFree(attvalue);
9549 }
9550
9551failed:
9552
9553 GROW
9554 if (ctxt->instate == XML_PARSER_EOF)
9555 break;
9556 if (ctxt->input->base != base) goto base_changed;
9557 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9558 break;
9559 if (!IS_BLANK_CH(RAW)) {
9560 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9561 "attributes construct error\n");
9562 break;
9563 }
9564 SKIP_BLANKS;
9565 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9566 (attname == NULL) && (attvalue == NULL)) {
9567 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9568 "xmlParseStartTag: problem parsing attributes\n");
9569 break;
9570 }
9571 GROW;
9572 if (ctxt->input->base != base) goto base_changed;
9573 }
9574
9575 /*
9576 * The attributes defaulting
9577 */
9578 if (ctxt->attsDefault != NULL) {
9579 xmlDefAttrsPtr defaults;
9580
9581 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9582 if (defaults != NULL) {
9583 for (i = 0;i < defaults->nbAttrs;i++) {
9584 attname = defaults->values[5 * i];
9585 aprefix = defaults->values[5 * i + 1];
9586
9587 /*
9588 * special work for namespaces defaulted defs
9589 */
9590 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9591 /*
9592 * check that it's not a defined namespace
9593 */
9594 for (j = 1;j <= nbNs;j++)
9595 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9596 break;
9597 if (j <= nbNs) continue;
9598
9599 nsname = xmlGetNamespace(ctxt, NULL);
9600 if (nsname != defaults->values[5 * i + 2]) {
9601 if (nsPush(ctxt, NULL,
9602 defaults->values[5 * i + 2]) > 0)
9603 nbNs++;
9604 }
9605 } else if (aprefix == ctxt->str_xmlns) {
9606 /*
9607 * check that it's not a defined namespace
9608 */
9609 for (j = 1;j <= nbNs;j++)
9610 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9611 break;
9612 if (j <= nbNs) continue;
9613
9614 nsname = xmlGetNamespace(ctxt, attname);
9615 if (nsname != defaults->values[2]) {
9616 if (nsPush(ctxt, attname,
9617 defaults->values[5 * i + 2]) > 0)
9618 nbNs++;
9619 }
9620 } else {
9621 /*
9622 * check that it's not a defined attribute
9623 */
9624 for (j = 0;j < nbatts;j+=5) {
9625 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9626 break;
9627 }
9628 if (j < nbatts) continue;
9629
9630 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9631 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9632 return(NULL);
9633 }
9634 maxatts = ctxt->maxatts;
9635 atts = ctxt->atts;
9636 }
9637 atts[nbatts++] = attname;
9638 atts[nbatts++] = aprefix;
9639 if (aprefix == NULL)
9640 atts[nbatts++] = NULL;
9641 else
9642 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9643 atts[nbatts++] = defaults->values[5 * i + 2];
9644 atts[nbatts++] = defaults->values[5 * i + 3];
9645 if ((ctxt->standalone == 1) &&
9646 (defaults->values[5 * i + 4] != NULL)) {
9647 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9648 "standalone: attribute %s on %s defaulted from external subset\n",
9649 attname, localname);
9650 }
9651 nbdef++;
9652 }
9653 }
9654 }
9655 }
9656
9657 /*
9658 * The attributes checkings
9659 */
9660 for (i = 0; i < nbatts;i += 5) {
9661 /*
9662 * The default namespace does not apply to attribute names.
9663 */
9664 if (atts[i + 1] != NULL) {
9665 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9666 if (nsname == NULL) {
9667 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9668 "Namespace prefix %s for %s on %s is not defined\n",
9669 atts[i + 1], atts[i], localname);
9670 }
9671 atts[i + 2] = nsname;
9672 } else
9673 nsname = NULL;
9674 /*
9675 * [ WFC: Unique Att Spec ]
9676 * No attribute name may appear more than once in the same
9677 * start-tag or empty-element tag.
9678 * As extended by the Namespace in XML REC.
9679 */
9680 for (j = 0; j < i;j += 5) {
9681 if (atts[i] == atts[j]) {
9682 if (atts[i+1] == atts[j+1]) {
9683 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9684 break;
9685 }
9686 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9687 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9688 "Namespaced Attribute %s in '%s' redefined\n",
9689 atts[i], nsname, NULL);
9690 break;
9691 }
9692 }
9693 }
9694 }
9695
9696 nsname = xmlGetNamespace(ctxt, prefix);
9697 if ((prefix != NULL) && (nsname == NULL)) {
9698 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9699 "Namespace prefix %s on %s is not defined\n",
9700 prefix, localname, NULL);
9701 }
9702 *pref = prefix;
9703 *URI = nsname;
9704
9705 /*
9706 * SAX: Start of Element !
9707 */
9708 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9709 (!ctxt->disableSAX)) {
9710 if (nbNs > 0)
9711 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9712 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9713 nbatts / 5, nbdef, atts);
9714 else
9715 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9716 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9717 }
9718
9719 /*
9720 * Free up attribute allocated strings if needed
9721 */
9722 if (attval != 0) {
9723 for (i = 3,j = 0; j < nratts;i += 5,j++)
9724 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9725 xmlFree((xmlChar *) atts[i]);
9726 }
9727
9728 return(localname);
9729
9730base_changed:
9731 /*
9732 * the attribute strings are valid iif the base didn't changed
9733 */
9734 if (attval != 0) {
9735 for (i = 3,j = 0; j < nratts;i += 5,j++)
9736 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9737 xmlFree((xmlChar *) atts[i]);
9738 }
9739 ctxt->input->cur = ctxt->input->base + cur;
9740 ctxt->input->line = oldline;
9741 ctxt->input->col = oldcol;
9742 if (ctxt->wellFormed == 1) {
9743 goto reparse;
9744 }
9745 return(NULL);
9746}
9747
9748/**
9749 * xmlParseEndTag2:
9750 * @ctxt: an XML parser context
9751 * @line: line of the start tag
9752 * @nsNr: number of namespaces on the start tag
9753 *
9754 * parse an end of tag
9755 *
9756 * [42] ETag ::= '</' Name S? '>'
9757 *
9758 * With namespace
9759 *
9760 * [NS 9] ETag ::= '</' QName S? '>'
9761 */
9762
9763static void
9764xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
9765 const xmlChar *URI, int line, int nsNr, int tlen) {
9766 const xmlChar *name;
9767
9768 GROW;
9769 if ((RAW != '<') || (NXT(1) != '/')) {
9770 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9771 return;
9772 }
9773 SKIP(2);
9774
9775 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9776 if (ctxt->input->cur[tlen] == '>') {
9777 ctxt->input->cur += tlen + 1;
9778 ctxt->input->col += tlen + 1;
9779 goto done;
9780 }
9781 ctxt->input->cur += tlen;
9782 ctxt->input->col += tlen;
9783 name = (xmlChar*)1;
9784 } else {
9785 if (prefix == NULL)
9786 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9787 else
9788 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9789 }
9790
9791 /*
9792 * We should definitely be at the ending "S? '>'" part
9793 */
9794 GROW;
9795 if (ctxt->instate == XML_PARSER_EOF)
9796 return;
9797 SKIP_BLANKS;
9798 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9799 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9800 } else
9801 NEXT1;
9802
9803 /*
9804 * [ WFC: Element Type Match ]
9805 * The Name in an element's end-tag must match the element type in the
9806 * start-tag.
9807 *
9808 */
9809 if (name != (xmlChar*)1) {
9810 if (name == NULL) name = BAD_CAST "unparseable";
9811 if ((line == 0) && (ctxt->node != NULL))
9812 line = ctxt->node->line;
9813 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9814 "Opening and ending tag mismatch: %s line %d and %s\n",
9815 ctxt->name, line, name);
9816 }
9817
9818 /*
9819 * SAX: End of Tag
9820 */
9821done:
9822 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9823 (!ctxt->disableSAX))
9824 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9825
9826 spacePop(ctxt);
9827 if (nsNr != 0)
9828 nsPop(ctxt, nsNr);
9829 return;
9830}
9831
9832/**
9833 * xmlParseCDSect:
9834 * @ctxt: an XML parser context
9835 *
9836 * Parse escaped pure raw content.
9837 *
9838 * [18] CDSect ::= CDStart CData CDEnd
9839 *
9840 * [19] CDStart ::= '<![CDATA['
9841 *
9842 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9843 *
9844 * [21] CDEnd ::= ']]>'
9845 */
9846void
9847xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9848 xmlChar *buf = NULL;
9849 int len = 0;
9850 int size = XML_PARSER_BUFFER_SIZE;
9851 int r, rl;
9852 int s, sl;
9853 int cur, l;
9854 int count = 0;
9855
9856 /* Check 2.6.0 was NXT(0) not RAW */
9857 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9858 SKIP(9);
9859 } else
9860 return;
9861
9862 ctxt->instate = XML_PARSER_CDATA_SECTION;
9863 r = CUR_CHAR(rl);
9864 if (!IS_CHAR(r)) {
9865 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9866 ctxt->instate = XML_PARSER_CONTENT;
9867 return;
9868 }
9869 NEXTL(rl);
9870 s = CUR_CHAR(sl);
9871 if (!IS_CHAR(s)) {
9872 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9873 ctxt->instate = XML_PARSER_CONTENT;
9874 return;
9875 }
9876 NEXTL(sl);
9877 cur = CUR_CHAR(l);
9878 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9879 if (buf == NULL) {
9880 xmlErrMemory(ctxt, NULL);
9881 return;
9882 }
9883 while (IS_CHAR(cur) &&
9884 ((r != ']') || (s != ']') || (cur != '>'))) {
9885 if (len + 5 >= size) {
9886 xmlChar *tmp;
9887
9888 if ((size > XML_MAX_TEXT_LENGTH) &&
9889 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9890 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9891 "CData section too big found", NULL);
9892 xmlFree (buf);
9893 return;
9894 }
9895 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9896 if (tmp == NULL) {
9897 xmlFree(buf);
9898 xmlErrMemory(ctxt, NULL);
9899 return;
9900 }
9901 buf = tmp;
9902 size *= 2;
9903 }
9904 COPY_BUF(rl,buf,len,r);
9905 r = s;
9906 rl = sl;
9907 s = cur;
9908 sl = l;
9909 count++;
9910 if (count > 50) {
9911 GROW;
9912 if (ctxt->instate == XML_PARSER_EOF) {
9913 xmlFree(buf);
9914 return;
9915 }
9916 count = 0;
9917 }
9918 NEXTL(l);
9919 cur = CUR_CHAR(l);
9920 }
9921 buf[len] = 0;
9922 ctxt->instate = XML_PARSER_CONTENT;
9923 if (cur != '>') {
9924 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9925 "CData section not finished\n%.50s\n", buf);
9926 xmlFree(buf);
9927 return;
9928 }
9929 NEXTL(l);
9930
9931 /*
9932 * OK the buffer is to be consumed as cdata.
9933 */
9934 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9935 if (ctxt->sax->cdataBlock != NULL)
9936 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9937 else if (ctxt->sax->characters != NULL)
9938 ctxt->sax->characters(ctxt->userData, buf, len);
9939 }
9940 xmlFree(buf);
9941}
9942
9943/**
9944 * xmlParseContent:
9945 * @ctxt: an XML parser context
9946 *
9947 * Parse a content:
9948 *
9949 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9950 */
9951
9952void
9953xmlParseContent(xmlParserCtxtPtr ctxt) {
9954 GROW;
9955 while ((RAW != 0) &&
9956 ((RAW != '<') || (NXT(1) != '/')) &&
9957 (ctxt->instate != XML_PARSER_EOF)) {
9958 const xmlChar *test = CUR_PTR;
9959 unsigned int cons = ctxt->input->consumed;
9960 const xmlChar *cur = ctxt->input->cur;
9961
9962 /*
9963 * First case : a Processing Instruction.
9964 */
9965 if ((*cur == '<') && (cur[1] == '?')) {
9966 xmlParsePI(ctxt);
9967 }
9968
9969 /*
9970 * Second case : a CDSection
9971 */
9972 /* 2.6.0 test was *cur not RAW */
9973 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9974 xmlParseCDSect(ctxt);
9975 }
9976
9977 /*
9978 * Third case : a comment
9979 */
9980 else if ((*cur == '<') && (NXT(1) == '!') &&
9981 (NXT(2) == '-') && (NXT(3) == '-')) {
9982 xmlParseComment(ctxt);
9983 ctxt->instate = XML_PARSER_CONTENT;
9984 }
9985
9986 /*
9987 * Fourth case : a sub-element.
9988 */
9989 else if (*cur == '<') {
9990 xmlParseElement(ctxt);
9991 }
9992
9993 /*
9994 * Fifth case : a reference. If if has not been resolved,
9995 * parsing returns it's Name, create the node
9996 */
9997
9998 else if (*cur == '&') {
9999 xmlParseReference(ctxt);
10000 }
10001
10002 /*
10003 * Last case, text. Note that References are handled directly.
10004 */
10005 else {
10006 xmlParseCharData(ctxt, 0);
10007 }
10008
10009 GROW;
10010 /*
10011 * Pop-up of finished entities.
10012 */
10013 while ((RAW == 0) && (ctxt->inputNr > 1))
10014 xmlPopInput(ctxt);
10015 SHRINK;
10016
10017 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
10018 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10019 "detected an error in element content\n");
10020 ctxt->instate = XML_PARSER_EOF;
10021 break;
10022 }
10023 }
10024}
10025
10026/**
10027 * xmlParseElement:
10028 * @ctxt: an XML parser context
10029 *
10030 * parse an XML element, this is highly recursive
10031 *
10032 * [39] element ::= EmptyElemTag | STag content ETag
10033 *
10034 * [ WFC: Element Type Match ]
10035 * The Name in an element's end-tag must match the element type in the
10036 * start-tag.
10037 *
10038 */
10039
10040void
10041xmlParseElement(xmlParserCtxtPtr ctxt) {
10042 const xmlChar *name;
10043 const xmlChar *prefix = NULL;
10044 const xmlChar *URI = NULL;
10045 xmlParserNodeInfo node_info;
10046 int line, tlen = 0;
10047 xmlNodePtr ret;
10048 int nsNr = ctxt->nsNr;
10049
10050 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10051 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10052 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10053 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10054 xmlParserMaxDepth);
10055 ctxt->instate = XML_PARSER_EOF;
10056 return;
10057 }
10058
10059 /* Capture start position */
10060 if (ctxt->record_info) {
10061 node_info.begin_pos = ctxt->input->consumed +
10062 (CUR_PTR - ctxt->input->base);
10063 node_info.begin_line = ctxt->input->line;
10064 }
10065
10066 if (ctxt->spaceNr == 0)
10067 spacePush(ctxt, -1);
10068 else if (*ctxt->space == -2)
10069 spacePush(ctxt, -1);
10070 else
10071 spacePush(ctxt, *ctxt->space);
10072
10073 line = ctxt->input->line;
10074#ifdef LIBXML_SAX1_ENABLED
10075 if (ctxt->sax2)
10076#endif /* LIBXML_SAX1_ENABLED */
10077 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10078#ifdef LIBXML_SAX1_ENABLED
10079 else
10080 name = xmlParseStartTag(ctxt);
10081#endif /* LIBXML_SAX1_ENABLED */
10082 if (ctxt->instate == XML_PARSER_EOF)
10083 return;
10084 if (name == NULL) {
10085 spacePop(ctxt);
10086 return;
10087 }
10088 namePush(ctxt, name);
10089 ret = ctxt->node;
10090
10091#ifdef LIBXML_VALID_ENABLED
10092 /*
10093 * [ VC: Root Element Type ]
10094 * The Name in the document type declaration must match the element
10095 * type of the root element.
10096 */
10097 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10098 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10099 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10100#endif /* LIBXML_VALID_ENABLED */
10101
10102 /*
10103 * Check for an Empty Element.
10104 */
10105 if ((RAW == '/') && (NXT(1) == '>')) {
10106 SKIP(2);
10107 if (ctxt->sax2) {
10108 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10109 (!ctxt->disableSAX))
10110 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10111#ifdef LIBXML_SAX1_ENABLED
10112 } else {
10113 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10114 (!ctxt->disableSAX))
10115 ctxt->sax->endElement(ctxt->userData, name);
10116#endif /* LIBXML_SAX1_ENABLED */
10117 }
10118 namePop(ctxt);
10119 spacePop(ctxt);
10120 if (nsNr != ctxt->nsNr)
10121 nsPop(ctxt, ctxt->nsNr - nsNr);
10122 if ( ret != NULL && ctxt->record_info ) {
10123 node_info.end_pos = ctxt->input->consumed +
10124 (CUR_PTR - ctxt->input->base);
10125 node_info.end_line = ctxt->input->line;
10126 node_info.node = ret;
10127 xmlParserAddNodeInfo(ctxt, &node_info);
10128 }
10129 return;
10130 }
10131 if (RAW == '>') {
10132 NEXT1;
10133 } else {
10134 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10135 "Couldn't find end of Start Tag %s line %d\n",
10136 name, line, NULL);
10137
10138 /*
10139 * end of parsing of this node.
10140 */
10141 nodePop(ctxt);
10142 namePop(ctxt);
10143 spacePop(ctxt);
10144 if (nsNr != ctxt->nsNr)
10145 nsPop(ctxt, ctxt->nsNr - nsNr);
10146
10147 /*
10148 * Capture end position and add node
10149 */
10150 if ( ret != NULL && ctxt->record_info ) {
10151 node_info.end_pos = ctxt->input->consumed +
10152 (CUR_PTR - ctxt->input->base);
10153 node_info.end_line = ctxt->input->line;
10154 node_info.node = ret;
10155 xmlParserAddNodeInfo(ctxt, &node_info);
10156 }
10157 return;
10158 }
10159
10160 /*
10161 * Parse the content of the element:
10162 */
10163 xmlParseContent(ctxt);
10164 if (ctxt->instate == XML_PARSER_EOF)
10165 return;
10166 if (!IS_BYTE_CHAR(RAW)) {
10167 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10168 "Premature end of data in tag %s line %d\n",
10169 name, line, NULL);
10170
10171 /*
10172 * end of parsing of this node.
10173 */
10174 nodePop(ctxt);
10175 namePop(ctxt);
10176 spacePop(ctxt);
10177 if (nsNr != ctxt->nsNr)
10178 nsPop(ctxt, ctxt->nsNr - nsNr);
10179 return;
10180 }
10181
10182 /*
10183 * parse the end of tag: '</' should be here.
10184 */
10185 if (ctxt->sax2) {
10186 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
10187 namePop(ctxt);
10188 }
10189#ifdef LIBXML_SAX1_ENABLED
10190 else
10191 xmlParseEndTag1(ctxt, line);
10192#endif /* LIBXML_SAX1_ENABLED */
10193
10194 /*
10195 * Capture end position and add node
10196 */
10197 if ( ret != NULL && ctxt->record_info ) {
10198 node_info.end_pos = ctxt->input->consumed +
10199 (CUR_PTR - ctxt->input->base);
10200 node_info.end_line = ctxt->input->line;
10201 node_info.node = ret;
10202 xmlParserAddNodeInfo(ctxt, &node_info);
10203 }
10204}
10205
10206/**
10207 * xmlParseVersionNum:
10208 * @ctxt: an XML parser context
10209 *
10210 * parse the XML version value.
10211 *
10212 * [26] VersionNum ::= '1.' [0-9]+
10213 *
10214 * In practice allow [0-9].[0-9]+ at that level
10215 *
10216 * Returns the string giving the XML version number, or NULL
10217 */
10218xmlChar *
10219xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10220 xmlChar *buf = NULL;
10221 int len = 0;
10222 int size = 10;
10223 xmlChar cur;
10224
10225 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10226 if (buf == NULL) {
10227 xmlErrMemory(ctxt, NULL);
10228 return(NULL);
10229 }
10230 cur = CUR;
10231 if (!((cur >= '0') && (cur <= '9'))) {
10232 xmlFree(buf);
10233 return(NULL);
10234 }
10235 buf[len++] = cur;
10236 NEXT;
10237 cur=CUR;
10238 if (cur != '.') {
10239 xmlFree(buf);
10240 return(NULL);
10241 }
10242 buf[len++] = cur;
10243 NEXT;
10244 cur=CUR;
10245 while ((cur >= '0') && (cur <= '9')) {
10246 if (len + 1 >= size) {
10247 xmlChar *tmp;
10248
10249 size *= 2;
10250 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10251 if (tmp == NULL) {
10252 xmlFree(buf);
10253 xmlErrMemory(ctxt, NULL);
10254 return(NULL);
10255 }
10256 buf = tmp;
10257 }
10258 buf[len++] = cur;
10259 NEXT;
10260 cur=CUR;
10261 }
10262 buf[len] = 0;
10263 return(buf);
10264}
10265
10266/**
10267 * xmlParseVersionInfo:
10268 * @ctxt: an XML parser context
10269 *
10270 * parse the XML version.
10271 *
10272 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10273 *
10274 * [25] Eq ::= S? '=' S?
10275 *
10276 * Returns the version string, e.g. "1.0"
10277 */
10278
10279xmlChar *
10280xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10281 xmlChar *version = NULL;
10282
10283 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10284 SKIP(7);
10285 SKIP_BLANKS;
10286 if (RAW != '=') {
10287 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10288 return(NULL);
10289 }
10290 NEXT;
10291 SKIP_BLANKS;
10292 if (RAW == '"') {
10293 NEXT;
10294 version = xmlParseVersionNum(ctxt);
10295 if (RAW != '"') {
10296 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10297 } else
10298 NEXT;
10299 } else if (RAW == '\''){
10300 NEXT;
10301 version = xmlParseVersionNum(ctxt);
10302 if (RAW != '\'') {
10303 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10304 } else
10305 NEXT;
10306 } else {
10307 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10308 }
10309 }
10310 return(version);
10311}
10312
10313/**
10314 * xmlParseEncName:
10315 * @ctxt: an XML parser context
10316 *
10317 * parse the XML encoding name
10318 *
10319 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10320 *
10321 * Returns the encoding name value or NULL
10322 */
10323xmlChar *
10324xmlParseEncName(xmlParserCtxtPtr ctxt) {
10325 xmlChar *buf = NULL;
10326 int len = 0;
10327 int size = 10;
10328 xmlChar cur;
10329
10330 cur = CUR;
10331 if (((cur >= 'a') && (cur <= 'z')) ||
10332 ((cur >= 'A') && (cur <= 'Z'))) {
10333 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10334 if (buf == NULL) {
10335 xmlErrMemory(ctxt, NULL);
10336 return(NULL);
10337 }
10338
10339 buf[len++] = cur;
10340 NEXT;
10341 cur = CUR;
10342 while (((cur >= 'a') && (cur <= 'z')) ||
10343 ((cur >= 'A') && (cur <= 'Z')) ||
10344 ((cur >= '0') && (cur <= '9')) ||
10345 (cur == '.') || (cur == '_') ||
10346 (cur == '-')) {
10347 if (len + 1 >= size) {
10348 xmlChar *tmp;
10349
10350 size *= 2;
10351 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10352 if (tmp == NULL) {
10353 xmlErrMemory(ctxt, NULL);
10354 xmlFree(buf);
10355 return(NULL);
10356 }
10357 buf = tmp;
10358 }
10359 buf[len++] = cur;
10360 NEXT;
10361 cur = CUR;
10362 if (cur == 0) {
10363 SHRINK;
10364 GROW;
10365 cur = CUR;
10366 }
10367 }
10368 buf[len] = 0;
10369 } else {
10370 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10371 }
10372 return(buf);
10373}
10374
10375/**
10376 * xmlParseEncodingDecl:
10377 * @ctxt: an XML parser context
10378 *
10379 * parse the XML encoding declaration
10380 *
10381 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10382 *
10383 * this setups the conversion filters.
10384 *
10385 * Returns the encoding value or NULL
10386 */
10387
10388const xmlChar *
10389xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10390 xmlChar *encoding = NULL;
10391
10392 SKIP_BLANKS;
10393 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10394 SKIP(8);
10395 SKIP_BLANKS;
10396 if (RAW != '=') {
10397 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10398 return(NULL);
10399 }
10400 NEXT;
10401 SKIP_BLANKS;
10402 if (RAW == '"') {
10403 NEXT;
10404 encoding = xmlParseEncName(ctxt);
10405 if (RAW != '"') {
10406 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10407 xmlFree((xmlChar *) encoding);
10408 return(NULL);
10409 } else
10410 NEXT;
10411 } else if (RAW == '\''){
10412 NEXT;
10413 encoding = xmlParseEncName(ctxt);
10414 if (RAW != '\'') {
10415 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10416 xmlFree((xmlChar *) encoding);
10417 return(NULL);
10418 } else
10419 NEXT;
10420 } else {
10421 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10422 }
10423
10424 /*
10425 * Non standard parsing, allowing the user to ignore encoding
10426 */
10427 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10428 xmlFree((xmlChar *) encoding);
10429 return(NULL);
10430 }
10431
10432 /*
10433 * UTF-16 encoding stwich has already taken place at this stage,
10434 * more over the little-endian/big-endian selection is already done
10435 */
10436 if ((encoding != NULL) &&
10437 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10438 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10439 /*
10440 * If no encoding was passed to the parser, that we are
10441 * using UTF-16 and no decoder is present i.e. the
10442 * document is apparently UTF-8 compatible, then raise an
10443 * encoding mismatch fatal error
10444 */
10445 if ((ctxt->encoding == NULL) &&
10446 (ctxt->input->buf != NULL) &&
10447 (ctxt->input->buf->encoder == NULL)) {
10448 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10449 "Document labelled UTF-16 but has UTF-8 content\n");
10450 }
10451 if (ctxt->encoding != NULL)
10452 xmlFree((xmlChar *) ctxt->encoding);
10453 ctxt->encoding = encoding;
10454 }
10455 /*
10456 * UTF-8 encoding is handled natively
10457 */
10458 else if ((encoding != NULL) &&
10459 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10460 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10461 if (ctxt->encoding != NULL)
10462 xmlFree((xmlChar *) ctxt->encoding);
10463 ctxt->encoding = encoding;
10464 }
10465 else if (encoding != NULL) {
10466 xmlCharEncodingHandlerPtr handler;
10467
10468 if (ctxt->input->encoding != NULL)
10469 xmlFree((xmlChar *) ctxt->input->encoding);
10470 ctxt->input->encoding = encoding;
10471
10472 handler = xmlFindCharEncodingHandler((const char *) encoding);
10473 if (handler != NULL) {
10474 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10475 /* failed to convert */
10476 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10477 return(NULL);
10478 }
10479 } else {
10480 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10481 "Unsupported encoding %s\n", encoding);
10482 return(NULL);
10483 }
10484 }
10485 }
10486 return(encoding);
10487}
10488
10489/**
10490 * xmlParseSDDecl:
10491 * @ctxt: an XML parser context
10492 *
10493 * parse the XML standalone declaration
10494 *
10495 * [32] SDDecl ::= S 'standalone' Eq
10496 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10497 *
10498 * [ VC: Standalone Document Declaration ]
10499 * TODO The standalone document declaration must have the value "no"
10500 * if any external markup declarations contain declarations of:
10501 * - attributes with default values, if elements to which these
10502 * attributes apply appear in the document without specifications
10503 * of values for these attributes, or
10504 * - entities (other than amp, lt, gt, apos, quot), if references
10505 * to those entities appear in the document, or
10506 * - attributes with values subject to normalization, where the
10507 * attribute appears in the document with a value which will change
10508 * as a result of normalization, or
10509 * - element types with element content, if white space occurs directly
10510 * within any instance of those types.
10511 *
10512 * Returns:
10513 * 1 if standalone="yes"
10514 * 0 if standalone="no"
10515 * -2 if standalone attribute is missing or invalid
10516 * (A standalone value of -2 means that the XML declaration was found,
10517 * but no value was specified for the standalone attribute).
10518 */
10519
10520int
10521xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10522 int standalone = -2;
10523
10524 SKIP_BLANKS;
10525 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10526 SKIP(10);
10527 SKIP_BLANKS;
10528 if (RAW != '=') {
10529 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10530 return(standalone);
10531 }
10532 NEXT;
10533 SKIP_BLANKS;
10534 if (RAW == '\''){
10535 NEXT;
10536 if ((RAW == 'n') && (NXT(1) == 'o')) {
10537 standalone = 0;
10538 SKIP(2);
10539 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10540 (NXT(2) == 's')) {
10541 standalone = 1;
10542 SKIP(3);
10543 } else {
10544 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10545 }
10546 if (RAW != '\'') {
10547 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10548 } else
10549 NEXT;
10550 } else if (RAW == '"'){
10551 NEXT;
10552 if ((RAW == 'n') && (NXT(1) == 'o')) {
10553 standalone = 0;
10554 SKIP(2);
10555 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10556 (NXT(2) == 's')) {
10557 standalone = 1;
10558 SKIP(3);
10559 } else {
10560 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10561 }
10562 if (RAW != '"') {
10563 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10564 } else
10565 NEXT;
10566 } else {
10567 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10568 }
10569 }
10570 return(standalone);
10571}
10572
10573/**
10574 * xmlParseXMLDecl:
10575 * @ctxt: an XML parser context
10576 *
10577 * parse an XML declaration header
10578 *
10579 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10580 */
10581
10582void
10583xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10584 xmlChar *version;
10585
10586 /*
10587 * This value for standalone indicates that the document has an
10588 * XML declaration but it does not have a standalone attribute.
10589 * It will be overwritten later if a standalone attribute is found.
10590 */
10591 ctxt->input->standalone = -2;
10592
10593 /*
10594 * We know that '<?xml' is here.
10595 */
10596 SKIP(5);
10597
10598 if (!IS_BLANK_CH(RAW)) {
10599 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10600 "Blank needed after '<?xml'\n");
10601 }
10602 SKIP_BLANKS;
10603
10604 /*
10605 * We must have the VersionInfo here.
10606 */
10607 version = xmlParseVersionInfo(ctxt);
10608 if (version == NULL) {
10609 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10610 } else {
10611 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10612 /*
10613 * Changed here for XML-1.0 5th edition
10614 */
10615 if (ctxt->options & XML_PARSE_OLD10) {
10616 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10617 "Unsupported version '%s'\n",
10618 version);
10619 } else {
10620 if ((version[0] == '1') && ((version[1] == '.'))) {
10621 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10622 "Unsupported version '%s'\n",
10623 version, NULL);
10624 } else {
10625 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10626 "Unsupported version '%s'\n",
10627 version);
10628 }
10629 }
10630 }
10631 if (ctxt->version != NULL)
10632 xmlFree((void *) ctxt->version);
10633 ctxt->version = version;
10634 }
10635
10636 /*
10637 * We may have the encoding declaration
10638 */
10639 if (!IS_BLANK_CH(RAW)) {
10640 if ((RAW == '?') && (NXT(1) == '>')) {
10641 SKIP(2);
10642 return;
10643 }
10644 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10645 }
10646 xmlParseEncodingDecl(ctxt);
10647 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10648 /*
10649 * The XML REC instructs us to stop parsing right here
10650 */
10651 return;
10652 }
10653
10654 /*
10655 * We may have the standalone status.
10656 */
10657 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10658 if ((RAW == '?') && (NXT(1) == '>')) {
10659 SKIP(2);
10660 return;
10661 }
10662 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10663 }
10664
10665 /*
10666 * We can grow the input buffer freely at that point
10667 */
10668 GROW;
10669
10670 SKIP_BLANKS;
10671 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10672
10673 SKIP_BLANKS;
10674 if ((RAW == '?') && (NXT(1) == '>')) {
10675 SKIP(2);
10676 } else if (RAW == '>') {
10677 /* Deprecated old WD ... */
10678 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10679 NEXT;
10680 } else {
10681 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10682 MOVETO_ENDTAG(CUR_PTR);
10683 NEXT;
10684 }
10685}
10686
10687/**
10688 * xmlParseMisc:
10689 * @ctxt: an XML parser context
10690 *
10691 * parse an XML Misc* optional field.
10692 *
10693 * [27] Misc ::= Comment | PI | S
10694 */
10695
10696void
10697xmlParseMisc(xmlParserCtxtPtr ctxt) {
10698 while ((ctxt->instate != XML_PARSER_EOF) &&
10699 (((RAW == '<') && (NXT(1) == '?')) ||
10700 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10701 IS_BLANK_CH(CUR))) {
10702 if ((RAW == '<') && (NXT(1) == '?')) {
10703 xmlParsePI(ctxt);
10704 } else if (IS_BLANK_CH(CUR)) {
10705 NEXT;
10706 } else
10707 xmlParseComment(ctxt);
10708 }
10709}
10710
10711/**
10712 * xmlParseDocument:
10713 * @ctxt: an XML parser context
10714 *
10715 * parse an XML document (and build a tree if using the standard SAX
10716 * interface).
10717 *
10718 * [1] document ::= prolog element Misc*
10719 *
10720 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10721 *
10722 * Returns 0, -1 in case of error. the parser context is augmented
10723 * as a result of the parsing.
10724 */
10725
10726int
10727xmlParseDocument(xmlParserCtxtPtr ctxt) {
10728 xmlChar start[4];
10729 xmlCharEncoding enc;
10730
10731 xmlInitParser();
10732
10733 if ((ctxt == NULL) || (ctxt->input == NULL))
10734 return(-1);
10735
10736 GROW;
10737
10738 /*
10739 * SAX: detecting the level.
10740 */
10741 xmlDetectSAX2(ctxt);
10742
10743 /*
10744 * SAX: beginning of the document processing.
10745 */
10746 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10747 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10748 if (ctxt->instate == XML_PARSER_EOF)
10749 return(-1);
10750
10751 if ((ctxt->encoding == NULL) &&
10752 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10753 /*
10754 * Get the 4 first bytes and decode the charset
10755 * if enc != XML_CHAR_ENCODING_NONE
10756 * plug some encoding conversion routines.
10757 */
10758 start[0] = RAW;
10759 start[1] = NXT(1);
10760 start[2] = NXT(2);
10761 start[3] = NXT(3);
10762 enc = xmlDetectCharEncoding(&start[0], 4);
10763 if (enc != XML_CHAR_ENCODING_NONE) {
10764 xmlSwitchEncoding(ctxt, enc);
10765 }
10766 }
10767
10768
10769 if (CUR == 0) {
10770 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10771 }
10772
10773 /*
10774 * Check for the XMLDecl in the Prolog.
10775 * do not GROW here to avoid the detected encoder to decode more
10776 * than just the first line, unless the amount of data is really
10777 * too small to hold "<?xml version="1.0" encoding="foo"
10778 */
10779 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10780 GROW;
10781 }
10782 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10783
10784 /*
10785 * Note that we will switch encoding on the fly.
10786 */
10787 xmlParseXMLDecl(ctxt);
10788 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10789 /*
10790 * The XML REC instructs us to stop parsing right here
10791 */
10792 return(-1);
10793 }
10794 ctxt->standalone = ctxt->input->standalone;
10795 SKIP_BLANKS;
10796 } else {
10797 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10798 }
10799 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10800 ctxt->sax->startDocument(ctxt->userData);
10801 if (ctxt->instate == XML_PARSER_EOF)
10802 return(-1);
10803 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10804 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10805 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10806 }
10807
10808 /*
10809 * The Misc part of the Prolog
10810 */
10811 GROW;
10812 xmlParseMisc(ctxt);
10813
10814 /*
10815 * Then possibly doc type declaration(s) and more Misc
10816 * (doctypedecl Misc*)?
10817 */
10818 GROW;
10819 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10820
10821 ctxt->inSubset = 1;
10822 xmlParseDocTypeDecl(ctxt);
10823 if (RAW == '[') {
10824 ctxt->instate = XML_PARSER_DTD;
10825 xmlParseInternalSubset(ctxt);
10826 if (ctxt->instate == XML_PARSER_EOF)
10827 return(-1);
10828 }
10829
10830 /*
10831 * Create and update the external subset.
10832 */
10833 ctxt->inSubset = 2;
10834 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10835 (!ctxt->disableSAX))
10836 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10837 ctxt->extSubSystem, ctxt->extSubURI);
10838 if (ctxt->instate == XML_PARSER_EOF)
10839 return(-1);
10840 ctxt->inSubset = 0;
10841
10842 xmlCleanSpecialAttr(ctxt);
10843
10844 ctxt->instate = XML_PARSER_PROLOG;
10845 xmlParseMisc(ctxt);
10846 }
10847
10848 /*
10849 * Time to start parsing the tree itself
10850 */
10851 GROW;
10852 if (RAW != '<') {
10853 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10854 "Start tag expected, '<' not found\n");
10855 } else {
10856 ctxt->instate = XML_PARSER_CONTENT;
10857 xmlParseElement(ctxt);
10858 ctxt->instate = XML_PARSER_EPILOG;
10859
10860
10861 /*
10862 * The Misc part at the end
10863 */
10864 xmlParseMisc(ctxt);
10865
10866 if (RAW != 0) {
10867 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10868 }
10869 ctxt->instate = XML_PARSER_EOF;
10870 }
10871
10872 /*
10873 * SAX: end of the document processing.
10874 */
10875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10876 ctxt->sax->endDocument(ctxt->userData);
10877
10878 /*
10879 * Remove locally kept entity definitions if the tree was not built
10880 */
10881 if ((ctxt->myDoc != NULL) &&
10882 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10883 xmlFreeDoc(ctxt->myDoc);
10884 ctxt->myDoc = NULL;
10885 }
10886
10887 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10888 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10889 if (ctxt->valid)
10890 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10891 if (ctxt->nsWellFormed)
10892 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10893 if (ctxt->options & XML_PARSE_OLD10)
10894 ctxt->myDoc->properties |= XML_DOC_OLD10;
10895 }
10896 if (! ctxt->wellFormed) {
10897 ctxt->valid = 0;
10898 return(-1);
10899 }
10900 return(0);
10901}
10902
10903/**
10904 * xmlParseExtParsedEnt:
10905 * @ctxt: an XML parser context
10906 *
10907 * parse a general parsed entity
10908 * An external general parsed entity is well-formed if it matches the
10909 * production labeled extParsedEnt.
10910 *
10911 * [78] extParsedEnt ::= TextDecl? content
10912 *
10913 * Returns 0, -1 in case of error. the parser context is augmented
10914 * as a result of the parsing.
10915 */
10916
10917int
10918xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10919 xmlChar start[4];
10920 xmlCharEncoding enc;
10921
10922 if ((ctxt == NULL) || (ctxt->input == NULL))
10923 return(-1);
10924
10925 xmlDefaultSAXHandlerInit();
10926
10927 xmlDetectSAX2(ctxt);
10928
10929 GROW;
10930
10931 /*
10932 * SAX: beginning of the document processing.
10933 */
10934 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10935 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10936
10937 /*
10938 * Get the 4 first bytes and decode the charset
10939 * if enc != XML_CHAR_ENCODING_NONE
10940 * plug some encoding conversion routines.
10941 */
10942 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10943 start[0] = RAW;
10944 start[1] = NXT(1);
10945 start[2] = NXT(2);
10946 start[3] = NXT(3);
10947 enc = xmlDetectCharEncoding(start, 4);
10948 if (enc != XML_CHAR_ENCODING_NONE) {
10949 xmlSwitchEncoding(ctxt, enc);
10950 }
10951 }
10952
10953
10954 if (CUR == 0) {
10955 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10956 }
10957
10958 /*
10959 * Check for the XMLDecl in the Prolog.
10960 */
10961 GROW;
10962 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10963
10964 /*
10965 * Note that we will switch encoding on the fly.
10966 */
10967 xmlParseXMLDecl(ctxt);
10968 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10969 /*
10970 * The XML REC instructs us to stop parsing right here
10971 */
10972 return(-1);
10973 }
10974 SKIP_BLANKS;
10975 } else {
10976 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10977 }
10978 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10979 ctxt->sax->startDocument(ctxt->userData);
10980 if (ctxt->instate == XML_PARSER_EOF)
10981 return(-1);
10982
10983 /*
10984 * Doing validity checking on chunk doesn't make sense
10985 */
10986 ctxt->instate = XML_PARSER_CONTENT;
10987 ctxt->validate = 0;
10988 ctxt->loadsubset = 0;
10989 ctxt->depth = 0;
10990
10991 xmlParseContent(ctxt);
10992 if (ctxt->instate == XML_PARSER_EOF)
10993 return(-1);
10994
10995 if ((RAW == '<') && (NXT(1) == '/')) {
10996 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10997 } else if (RAW != 0) {
10998 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10999 }
11000
11001 /*
11002 * SAX: end of the document processing.
11003 */
11004 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11005 ctxt->sax->endDocument(ctxt->userData);
11006
11007 if (! ctxt->wellFormed) return(-1);
11008 return(0);
11009}
11010
11011#ifdef LIBXML_PUSH_ENABLED
11012/************************************************************************
11013 * *
11014 * Progressive parsing interfaces *
11015 * *
11016 ************************************************************************/
11017
11018/**
11019 * xmlParseLookupSequence:
11020 * @ctxt: an XML parser context
11021 * @first: the first char to lookup
11022 * @next: the next char to lookup or zero
11023 * @third: the next char to lookup or zero
11024 *
11025 * Try to find if a sequence (first, next, third) or just (first next) or
11026 * (first) is available in the input stream.
11027 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11028 * to avoid rescanning sequences of bytes, it DOES change the state of the
11029 * parser, do not use liberally.
11030 *
11031 * Returns the index to the current parsing point if the full sequence
11032 * is available, -1 otherwise.
11033 */
11034static int
11035xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11036 xmlChar next, xmlChar third) {
11037 int base, len;
11038 xmlParserInputPtr in;
11039 const xmlChar *buf;
11040
11041 in = ctxt->input;
11042 if (in == NULL) return(-1);
11043 base = in->cur - in->base;
11044 if (base < 0) return(-1);
11045 if (ctxt->checkIndex > base)
11046 base = ctxt->checkIndex;
11047 if (in->buf == NULL) {
11048 buf = in->base;
11049 len = in->length;
11050 } else {
11051 buf = xmlBufContent(in->buf->buffer);
11052 len = xmlBufUse(in->buf->buffer);
11053 }
11054 /* take into account the sequence length */
11055 if (third) len -= 2;
11056 else if (next) len --;
11057 for (;base < len;base++) {
11058 if (buf[base] == first) {
11059 if (third != 0) {
11060 if ((buf[base + 1] != next) ||
11061 (buf[base + 2] != third)) continue;
11062 } else if (next != 0) {
11063 if (buf[base + 1] != next) continue;
11064 }
11065 ctxt->checkIndex = 0;
11066#ifdef DEBUG_PUSH
11067 if (next == 0)
11068 xmlGenericError(xmlGenericErrorContext,
11069 "PP: lookup '%c' found at %d\n",
11070 first, base);
11071 else if (third == 0)
11072 xmlGenericError(xmlGenericErrorContext,
11073 "PP: lookup '%c%c' found at %d\n",
11074 first, next, base);
11075 else
11076 xmlGenericError(xmlGenericErrorContext,
11077 "PP: lookup '%c%c%c' found at %d\n",
11078 first, next, third, base);
11079#endif
11080 return(base - (in->cur - in->base));
11081 }
11082 }
11083 ctxt->checkIndex = base;
11084#ifdef DEBUG_PUSH
11085 if (next == 0)
11086 xmlGenericError(xmlGenericErrorContext,
11087 "PP: lookup '%c' failed\n", first);
11088 else if (third == 0)
11089 xmlGenericError(xmlGenericErrorContext,
11090 "PP: lookup '%c%c' failed\n", first, next);
11091 else
11092 xmlGenericError(xmlGenericErrorContext,
11093 "PP: lookup '%c%c%c' failed\n", first, next, third);
11094#endif
11095 return(-1);
11096}
11097
11098/**
11099 * xmlParseGetLasts:
11100 * @ctxt: an XML parser context
11101 * @lastlt: pointer to store the last '<' from the input
11102 * @lastgt: pointer to store the last '>' from the input
11103 *
11104 * Lookup the last < and > in the current chunk
11105 */
11106static void
11107xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11108 const xmlChar **lastgt) {
11109 const xmlChar *tmp;
11110
11111 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11112 xmlGenericError(xmlGenericErrorContext,
11113 "Internal error: xmlParseGetLasts\n");
11114 return;
11115 }
11116 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11117 tmp = ctxt->input->end;
11118 tmp--;
11119 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11120 if (tmp < ctxt->input->base) {
11121 *lastlt = NULL;
11122 *lastgt = NULL;
11123 } else {
11124 *lastlt = tmp;
11125 tmp++;
11126 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11127 if (*tmp == '\'') {
11128 tmp++;
11129 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11130 if (tmp < ctxt->input->end) tmp++;
11131 } else if (*tmp == '"') {
11132 tmp++;
11133 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11134 if (tmp < ctxt->input->end) tmp++;
11135 } else
11136 tmp++;
11137 }
11138 if (tmp < ctxt->input->end)
11139 *lastgt = tmp;
11140 else {
11141 tmp = *lastlt;
11142 tmp--;
11143 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11144 if (tmp >= ctxt->input->base)
11145 *lastgt = tmp;
11146 else
11147 *lastgt = NULL;
11148 }
11149 }
11150 } else {
11151 *lastlt = NULL;
11152 *lastgt = NULL;
11153 }
11154}
11155/**
11156 * xmlCheckCdataPush:
11157 * @cur: pointer to the bock of characters
11158 * @len: length of the block in bytes
11159 *
11160 * Check that the block of characters is okay as SCdata content [20]
11161 *
11162 * Returns the number of bytes to pass if okay, a negative index where an
11163 * UTF-8 error occured otherwise
11164 */
11165static int
11166xmlCheckCdataPush(const xmlChar *utf, int len) {
11167 int ix;
11168 unsigned char c;
11169 int codepoint;
11170
11171 if ((utf == NULL) || (len <= 0))
11172 return(0);
11173
11174 for (ix = 0; ix < len;) { /* string is 0-terminated */
11175 c = utf[ix];
11176 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11177 if (c >= 0x20)
11178 ix++;
11179 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11180 ix++;
11181 else
11182 return(-ix);
11183 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11184 if (ix + 2 > len) return(ix);
11185 if ((utf[ix+1] & 0xc0 ) != 0x80)
11186 return(-ix);
11187 codepoint = (utf[ix] & 0x1f) << 6;
11188 codepoint |= utf[ix+1] & 0x3f;
11189 if (!xmlIsCharQ(codepoint))
11190 return(-ix);
11191 ix += 2;
11192 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11193 if (ix + 3 > len) return(ix);
11194 if (((utf[ix+1] & 0xc0) != 0x80) ||
11195 ((utf[ix+2] & 0xc0) != 0x80))
11196 return(-ix);
11197 codepoint = (utf[ix] & 0xf) << 12;
11198 codepoint |= (utf[ix+1] & 0x3f) << 6;
11199 codepoint |= utf[ix+2] & 0x3f;
11200 if (!xmlIsCharQ(codepoint))
11201 return(-ix);
11202 ix += 3;
11203 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11204 if (ix + 4 > len) return(ix);
11205 if (((utf[ix+1] & 0xc0) != 0x80) ||
11206 ((utf[ix+2] & 0xc0) != 0x80) ||
11207 ((utf[ix+3] & 0xc0) != 0x80))
11208 return(-ix);
11209 codepoint = (utf[ix] & 0x7) << 18;
11210 codepoint |= (utf[ix+1] & 0x3f) << 12;
11211 codepoint |= (utf[ix+2] & 0x3f) << 6;
11212 codepoint |= utf[ix+3] & 0x3f;
11213 if (!xmlIsCharQ(codepoint))
11214 return(-ix);
11215 ix += 4;
11216 } else /* unknown encoding */
11217 return(-ix);
11218 }
11219 return(ix);
11220}
11221
11222/**
11223 * xmlParseTryOrFinish:
11224 * @ctxt: an XML parser context
11225 * @terminate: last chunk indicator
11226 *
11227 * Try to progress on parsing
11228 *
11229 * Returns zero if no parsing was possible
11230 */
11231static int
11232xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11233 int ret = 0;
11234 int avail, tlen;
11235 xmlChar cur, next;
11236 const xmlChar *lastlt, *lastgt;
11237
11238 if (ctxt->input == NULL)
11239 return(0);
11240
11241#ifdef DEBUG_PUSH
11242 switch (ctxt->instate) {
11243 case XML_PARSER_EOF:
11244 xmlGenericError(xmlGenericErrorContext,
11245 "PP: try EOF\n"); break;
11246 case XML_PARSER_START:
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: try START\n"); break;
11249 case XML_PARSER_MISC:
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: try MISC\n");break;
11252 case XML_PARSER_COMMENT:
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: try COMMENT\n");break;
11255 case XML_PARSER_PROLOG:
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: try PROLOG\n");break;
11258 case XML_PARSER_START_TAG:
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: try START_TAG\n");break;
11261 case XML_PARSER_CONTENT:
11262 xmlGenericError(xmlGenericErrorContext,
11263 "PP: try CONTENT\n");break;
11264 case XML_PARSER_CDATA_SECTION:
11265 xmlGenericError(xmlGenericErrorContext,
11266 "PP: try CDATA_SECTION\n");break;
11267 case XML_PARSER_END_TAG:
11268 xmlGenericError(xmlGenericErrorContext,
11269 "PP: try END_TAG\n");break;
11270 case XML_PARSER_ENTITY_DECL:
11271 xmlGenericError(xmlGenericErrorContext,
11272 "PP: try ENTITY_DECL\n");break;
11273 case XML_PARSER_ENTITY_VALUE:
11274 xmlGenericError(xmlGenericErrorContext,
11275 "PP: try ENTITY_VALUE\n");break;
11276 case XML_PARSER_ATTRIBUTE_VALUE:
11277 xmlGenericError(xmlGenericErrorContext,
11278 "PP: try ATTRIBUTE_VALUE\n");break;
11279 case XML_PARSER_DTD:
11280 xmlGenericError(xmlGenericErrorContext,
11281 "PP: try DTD\n");break;
11282 case XML_PARSER_EPILOG:
11283 xmlGenericError(xmlGenericErrorContext,
11284 "PP: try EPILOG\n");break;
11285 case XML_PARSER_PI:
11286 xmlGenericError(xmlGenericErrorContext,
11287 "PP: try PI\n");break;
11288 case XML_PARSER_IGNORE:
11289 xmlGenericError(xmlGenericErrorContext,
11290 "PP: try IGNORE\n");break;
11291 }
11292#endif
11293
11294 if ((ctxt->input != NULL) &&
11295 (ctxt->input->cur - ctxt->input->base > 4096)) {
11296 xmlSHRINK(ctxt);
11297 ctxt->checkIndex = 0;
11298 }
11299 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11300
11301 while (ctxt->instate != XML_PARSER_EOF) {
11302 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11303 return(0);
11304
11305
11306 /*
11307 * Pop-up of finished entities.
11308 */
11309 while ((RAW == 0) && (ctxt->inputNr > 1))
11310 xmlPopInput(ctxt);
11311
11312 if (ctxt->input == NULL) break;
11313 if (ctxt->input->buf == NULL)
11314 avail = ctxt->input->length -
11315 (ctxt->input->cur - ctxt->input->base);
11316 else {
11317 /*
11318 * If we are operating on converted input, try to flush
11319 * remainng chars to avoid them stalling in the non-converted
11320 * buffer. But do not do this in document start where
11321 * encoding="..." may not have been read and we work on a
11322 * guessed encoding.
11323 */
11324 if ((ctxt->instate != XML_PARSER_START) &&
11325 (ctxt->input->buf->raw != NULL) &&
11326 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11327 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11328 ctxt->input);
11329 size_t current = ctxt->input->cur - ctxt->input->base;
11330
11331 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11332 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11333 base, current);
11334 }
11335 avail = xmlBufUse(ctxt->input->buf->buffer) -
11336 (ctxt->input->cur - ctxt->input->base);
11337 }
11338 if (avail < 1)
11339 goto done;
11340 switch (ctxt->instate) {
11341 case XML_PARSER_EOF:
11342 /*
11343 * Document parsing is done !
11344 */
11345 goto done;
11346 case XML_PARSER_START:
11347 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11348 xmlChar start[4];
11349 xmlCharEncoding enc;
11350
11351 /*
11352 * Very first chars read from the document flow.
11353 */
11354 if (avail < 4)
11355 goto done;
11356
11357 /*
11358 * Get the 4 first bytes and decode the charset
11359 * if enc != XML_CHAR_ENCODING_NONE
11360 * plug some encoding conversion routines,
11361 * else xmlSwitchEncoding will set to (default)
11362 * UTF8.
11363 */
11364 start[0] = RAW;
11365 start[1] = NXT(1);
11366 start[2] = NXT(2);
11367 start[3] = NXT(3);
11368 enc = xmlDetectCharEncoding(start, 4);
11369 xmlSwitchEncoding(ctxt, enc);
11370 break;
11371 }
11372
11373 if (avail < 2)
11374 goto done;
11375 cur = ctxt->input->cur[0];
11376 next = ctxt->input->cur[1];
11377 if (cur == 0) {
11378 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11379 ctxt->sax->setDocumentLocator(ctxt->userData,
11380 &xmlDefaultSAXLocator);
11381 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11382 ctxt->instate = XML_PARSER_EOF;
11383#ifdef DEBUG_PUSH
11384 xmlGenericError(xmlGenericErrorContext,
11385 "PP: entering EOF\n");
11386#endif
11387 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11388 ctxt->sax->endDocument(ctxt->userData);
11389 goto done;
11390 }
11391 if ((cur == '<') && (next == '?')) {
11392 /* PI or XML decl */
11393 if (avail < 5) return(ret);
11394 if ((!terminate) &&
11395 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11396 return(ret);
11397 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11398 ctxt->sax->setDocumentLocator(ctxt->userData,
11399 &xmlDefaultSAXLocator);
11400 if ((ctxt->input->cur[2] == 'x') &&
11401 (ctxt->input->cur[3] == 'm') &&
11402 (ctxt->input->cur[4] == 'l') &&
11403 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11404 ret += 5;
11405#ifdef DEBUG_PUSH
11406 xmlGenericError(xmlGenericErrorContext,
11407 "PP: Parsing XML Decl\n");
11408#endif
11409 xmlParseXMLDecl(ctxt);
11410 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11411 /*
11412 * The XML REC instructs us to stop parsing right
11413 * here
11414 */
11415 ctxt->instate = XML_PARSER_EOF;
11416 return(0);
11417 }
11418 ctxt->standalone = ctxt->input->standalone;
11419 if ((ctxt->encoding == NULL) &&
11420 (ctxt->input->encoding != NULL))
11421 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11422 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11423 (!ctxt->disableSAX))
11424 ctxt->sax->startDocument(ctxt->userData);
11425 ctxt->instate = XML_PARSER_MISC;
11426#ifdef DEBUG_PUSH
11427 xmlGenericError(xmlGenericErrorContext,
11428 "PP: entering MISC\n");
11429#endif
11430 } else {
11431 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11432 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11433 (!ctxt->disableSAX))
11434 ctxt->sax->startDocument(ctxt->userData);
11435 ctxt->instate = XML_PARSER_MISC;
11436#ifdef DEBUG_PUSH
11437 xmlGenericError(xmlGenericErrorContext,
11438 "PP: entering MISC\n");
11439#endif
11440 }
11441 } else {
11442 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11443 ctxt->sax->setDocumentLocator(ctxt->userData,
11444 &xmlDefaultSAXLocator);
11445 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11446 if (ctxt->version == NULL) {
11447 xmlErrMemory(ctxt, NULL);
11448 break;
11449 }
11450 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11451 (!ctxt->disableSAX))
11452 ctxt->sax->startDocument(ctxt->userData);
11453 ctxt->instate = XML_PARSER_MISC;
11454#ifdef DEBUG_PUSH
11455 xmlGenericError(xmlGenericErrorContext,
11456 "PP: entering MISC\n");
11457#endif
11458 }
11459 break;
11460 case XML_PARSER_START_TAG: {
11461 const xmlChar *name;
11462 const xmlChar *prefix = NULL;
11463 const xmlChar *URI = NULL;
11464 int nsNr = ctxt->nsNr;
11465
11466 if ((avail < 2) && (ctxt->inputNr == 1))
11467 goto done;
11468 cur = ctxt->input->cur[0];
11469 if (cur != '<') {
11470 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11471 ctxt->instate = XML_PARSER_EOF;
11472 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11473 ctxt->sax->endDocument(ctxt->userData);
11474 goto done;
11475 }
11476 if (!terminate) {
11477 if (ctxt->progressive) {
11478 /* > can be found unescaped in attribute values */
11479 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11480 goto done;
11481 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11482 goto done;
11483 }
11484 }
11485 if (ctxt->spaceNr == 0)
11486 spacePush(ctxt, -1);
11487 else if (*ctxt->space == -2)
11488 spacePush(ctxt, -1);
11489 else
11490 spacePush(ctxt, *ctxt->space);
11491#ifdef LIBXML_SAX1_ENABLED
11492 if (ctxt->sax2)
11493#endif /* LIBXML_SAX1_ENABLED */
11494 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11495#ifdef LIBXML_SAX1_ENABLED
11496 else
11497 name = xmlParseStartTag(ctxt);
11498#endif /* LIBXML_SAX1_ENABLED */
11499 if (ctxt->instate == XML_PARSER_EOF)
11500 goto done;
11501 if (name == NULL) {
11502 spacePop(ctxt);
11503 ctxt->instate = XML_PARSER_EOF;
11504 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11505 ctxt->sax->endDocument(ctxt->userData);
11506 goto done;
11507 }
11508#ifdef LIBXML_VALID_ENABLED
11509 /*
11510 * [ VC: Root Element Type ]
11511 * The Name in the document type declaration must match
11512 * the element type of the root element.
11513 */
11514 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11515 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11516 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11517#endif /* LIBXML_VALID_ENABLED */
11518
11519 /*
11520 * Check for an Empty Element.
11521 */
11522 if ((RAW == '/') && (NXT(1) == '>')) {
11523 SKIP(2);
11524
11525 if (ctxt->sax2) {
11526 if ((ctxt->sax != NULL) &&
11527 (ctxt->sax->endElementNs != NULL) &&
11528 (!ctxt->disableSAX))
11529 ctxt->sax->endElementNs(ctxt->userData, name,
11530 prefix, URI);
11531 if (ctxt->nsNr - nsNr > 0)
11532 nsPop(ctxt, ctxt->nsNr - nsNr);
11533#ifdef LIBXML_SAX1_ENABLED
11534 } else {
11535 if ((ctxt->sax != NULL) &&
11536 (ctxt->sax->endElement != NULL) &&
11537 (!ctxt->disableSAX))
11538 ctxt->sax->endElement(ctxt->userData, name);
11539#endif /* LIBXML_SAX1_ENABLED */
11540 }
11541 if (ctxt->instate == XML_PARSER_EOF)
11542 goto done;
11543 spacePop(ctxt);
11544 if (ctxt->nameNr == 0) {
11545 ctxt->instate = XML_PARSER_EPILOG;
11546 } else {
11547 ctxt->instate = XML_PARSER_CONTENT;
11548 }
11549 ctxt->progressive = 1;
11550 break;
11551 }
11552 if (RAW == '>') {
11553 NEXT;
11554 } else {
11555 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11556 "Couldn't find end of Start Tag %s\n",
11557 name);
11558 nodePop(ctxt);
11559 spacePop(ctxt);
11560 }
11561 if (ctxt->sax2)
11562 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
11563#ifdef LIBXML_SAX1_ENABLED
11564 else
11565 namePush(ctxt, name);
11566#endif /* LIBXML_SAX1_ENABLED */
11567
11568 ctxt->instate = XML_PARSER_CONTENT;
11569 ctxt->progressive = 1;
11570 break;
11571 }
11572 case XML_PARSER_CONTENT: {
11573 const xmlChar *test;
11574 unsigned int cons;
11575 if ((avail < 2) && (ctxt->inputNr == 1))
11576 goto done;
11577 cur = ctxt->input->cur[0];
11578 next = ctxt->input->cur[1];
11579
11580 test = CUR_PTR;
11581 cons = ctxt->input->consumed;
11582 if ((cur == '<') && (next == '/')) {
11583 ctxt->instate = XML_PARSER_END_TAG;
11584 break;
11585 } else if ((cur == '<') && (next == '?')) {
11586 if ((!terminate) &&
11587 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11588 ctxt->progressive = XML_PARSER_PI;
11589 goto done;
11590 }
11591 xmlParsePI(ctxt);
11592 ctxt->instate = XML_PARSER_CONTENT;
11593 ctxt->progressive = 1;
11594 } else if ((cur == '<') && (next != '!')) {
11595 ctxt->instate = XML_PARSER_START_TAG;
11596 break;
11597 } else if ((cur == '<') && (next == '!') &&
11598 (ctxt->input->cur[2] == '-') &&
11599 (ctxt->input->cur[3] == '-')) {
11600 int term;
11601
11602 if (avail < 4)
11603 goto done;
11604 ctxt->input->cur += 4;
11605 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11606 ctxt->input->cur -= 4;
11607 if ((!terminate) && (term < 0)) {
11608 ctxt->progressive = XML_PARSER_COMMENT;
11609 goto done;
11610 }
11611 xmlParseComment(ctxt);
11612 ctxt->instate = XML_PARSER_CONTENT;
11613 ctxt->progressive = 1;
11614 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11615 (ctxt->input->cur[2] == '[') &&
11616 (ctxt->input->cur[3] == 'C') &&
11617 (ctxt->input->cur[4] == 'D') &&
11618 (ctxt->input->cur[5] == 'A') &&
11619 (ctxt->input->cur[6] == 'T') &&
11620 (ctxt->input->cur[7] == 'A') &&
11621 (ctxt->input->cur[8] == '[')) {
11622 SKIP(9);
11623 ctxt->instate = XML_PARSER_CDATA_SECTION;
11624 break;
11625 } else if ((cur == '<') && (next == '!') &&
11626 (avail < 9)) {
11627 goto done;
11628 } else if (cur == '&') {
11629 if ((!terminate) &&
11630 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11631 goto done;
11632 xmlParseReference(ctxt);
11633 } else {
11634 /* TODO Avoid the extra copy, handle directly !!! */
11635 /*
11636 * Goal of the following test is:
11637 * - minimize calls to the SAX 'character' callback
11638 * when they are mergeable
11639 * - handle an problem for isBlank when we only parse
11640 * a sequence of blank chars and the next one is
11641 * not available to check against '<' presence.
11642 * - tries to homogenize the differences in SAX
11643 * callbacks between the push and pull versions
11644 * of the parser.
11645 */
11646 if ((ctxt->inputNr == 1) &&
11647 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11648 if (!terminate) {
11649 if (ctxt->progressive) {
11650 if ((lastlt == NULL) ||
11651 (ctxt->input->cur > lastlt))
11652 goto done;
11653 } else if (xmlParseLookupSequence(ctxt,
11654 '<', 0, 0) < 0) {
11655 goto done;
11656 }
11657 }
11658 }
11659 ctxt->checkIndex = 0;
11660 xmlParseCharData(ctxt, 0);
11661 }
11662 /*
11663 * Pop-up of finished entities.
11664 */
11665 while ((RAW == 0) && (ctxt->inputNr > 1))
11666 xmlPopInput(ctxt);
11667 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
11668 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11669 "detected an error in element content\n");
11670 ctxt->instate = XML_PARSER_EOF;
11671 break;
11672 }
11673 break;
11674 }
11675 case XML_PARSER_END_TAG:
11676 if (avail < 2)
11677 goto done;
11678 if (!terminate) {
11679 if (ctxt->progressive) {
11680 /* > can be found unescaped in attribute values */
11681 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11682 goto done;
11683 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11684 goto done;
11685 }
11686 }
11687 if (ctxt->sax2) {
11688 xmlParseEndTag2(ctxt,
11689 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11690 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11691 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
11692 nameNsPop(ctxt);
11693 }
11694#ifdef LIBXML_SAX1_ENABLED
11695 else
11696 xmlParseEndTag1(ctxt, 0);
11697#endif /* LIBXML_SAX1_ENABLED */
11698 if (ctxt->instate == XML_PARSER_EOF) {
11699 /* Nothing */
11700 } else if (ctxt->nameNr == 0) {
11701 ctxt->instate = XML_PARSER_EPILOG;
11702 } else {
11703 ctxt->instate = XML_PARSER_CONTENT;
11704 }
11705 break;
11706 case XML_PARSER_CDATA_SECTION: {
11707 /*
11708 * The Push mode need to have the SAX callback for
11709 * cdataBlock merge back contiguous callbacks.
11710 */
11711 int base;
11712
11713 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11714 if (base < 0) {
11715 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11716 int tmp;
11717
11718 tmp = xmlCheckCdataPush(ctxt->input->cur,
11719 XML_PARSER_BIG_BUFFER_SIZE);
11720 if (tmp < 0) {
11721 tmp = -tmp;
11722 ctxt->input->cur += tmp;
11723 goto encoding_error;
11724 }
11725 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11726 if (ctxt->sax->cdataBlock != NULL)
11727 ctxt->sax->cdataBlock(ctxt->userData,
11728 ctxt->input->cur, tmp);
11729 else if (ctxt->sax->characters != NULL)
11730 ctxt->sax->characters(ctxt->userData,
11731 ctxt->input->cur, tmp);
11732 }
11733 if (ctxt->instate == XML_PARSER_EOF)
11734 goto done;
11735 SKIPL(tmp);
11736 ctxt->checkIndex = 0;
11737 }
11738 goto done;
11739 } else {
11740 int tmp;
11741
11742 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11743 if ((tmp < 0) || (tmp != base)) {
11744 tmp = -tmp;
11745 ctxt->input->cur += tmp;
11746 goto encoding_error;
11747 }
11748 if ((ctxt->sax != NULL) && (base == 0) &&
11749 (ctxt->sax->cdataBlock != NULL) &&
11750 (!ctxt->disableSAX)) {
11751 /*
11752 * Special case to provide identical behaviour
11753 * between pull and push parsers on enpty CDATA
11754 * sections
11755 */
11756 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11757 (!strncmp((const char *)&ctxt->input->cur[-9],
11758 "<![CDATA[", 9)))
11759 ctxt->sax->cdataBlock(ctxt->userData,
11760 BAD_CAST "", 0);
11761 } else if ((ctxt->sax != NULL) && (base > 0) &&
11762 (!ctxt->disableSAX)) {
11763 if (ctxt->sax->cdataBlock != NULL)
11764 ctxt->sax->cdataBlock(ctxt->userData,
11765 ctxt->input->cur, base);
11766 else if (ctxt->sax->characters != NULL)
11767 ctxt->sax->characters(ctxt->userData,
11768 ctxt->input->cur, base);
11769 }
11770 if (ctxt->instate == XML_PARSER_EOF)
11771 goto done;
11772 SKIPL(base + 3);
11773 ctxt->checkIndex = 0;
11774 ctxt->instate = XML_PARSER_CONTENT;
11775#ifdef DEBUG_PUSH
11776 xmlGenericError(xmlGenericErrorContext,
11777 "PP: entering CONTENT\n");
11778#endif
11779 }
11780 break;
11781 }
11782 case XML_PARSER_MISC:
11783 SKIP_BLANKS;
11784 if (ctxt->input->buf == NULL)
11785 avail = ctxt->input->length -
11786 (ctxt->input->cur - ctxt->input->base);
11787 else
11788 avail = xmlBufUse(ctxt->input->buf->buffer) -
11789 (ctxt->input->cur - ctxt->input->base);
11790 if (avail < 2)
11791 goto done;
11792 cur = ctxt->input->cur[0];
11793 next = ctxt->input->cur[1];
11794 if ((cur == '<') && (next == '?')) {
11795 if ((!terminate) &&
11796 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11797 ctxt->progressive = XML_PARSER_PI;
11798 goto done;
11799 }
11800#ifdef DEBUG_PUSH
11801 xmlGenericError(xmlGenericErrorContext,
11802 "PP: Parsing PI\n");
11803#endif
11804 xmlParsePI(ctxt);
11805 if (ctxt->instate == XML_PARSER_EOF)
11806 goto done;
11807 ctxt->instate = XML_PARSER_MISC;
11808 ctxt->progressive = 1;
11809 ctxt->checkIndex = 0;
11810 } else if ((cur == '<') && (next == '!') &&
11811 (ctxt->input->cur[2] == '-') &&
11812 (ctxt->input->cur[3] == '-')) {
11813 if ((!terminate) &&
11814 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11815 ctxt->progressive = XML_PARSER_COMMENT;
11816 goto done;
11817 }
11818#ifdef DEBUG_PUSH
11819 xmlGenericError(xmlGenericErrorContext,
11820 "PP: Parsing Comment\n");
11821#endif
11822 xmlParseComment(ctxt);
11823 if (ctxt->instate == XML_PARSER_EOF)
11824 goto done;
11825 ctxt->instate = XML_PARSER_MISC;
11826 ctxt->progressive = 1;
11827 ctxt->checkIndex = 0;
11828 } else if ((cur == '<') && (next == '!') &&
11829 (ctxt->input->cur[2] == 'D') &&
11830 (ctxt->input->cur[3] == 'O') &&
11831 (ctxt->input->cur[4] == 'C') &&
11832 (ctxt->input->cur[5] == 'T') &&
11833 (ctxt->input->cur[6] == 'Y') &&
11834 (ctxt->input->cur[7] == 'P') &&
11835 (ctxt->input->cur[8] == 'E')) {
11836 if ((!terminate) &&
11837 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11838 ctxt->progressive = XML_PARSER_DTD;
11839 goto done;
11840 }
11841#ifdef DEBUG_PUSH
11842 xmlGenericError(xmlGenericErrorContext,
11843 "PP: Parsing internal subset\n");
11844#endif
11845 ctxt->inSubset = 1;
11846 ctxt->progressive = 0;
11847 ctxt->checkIndex = 0;
11848 xmlParseDocTypeDecl(ctxt);
11849 if (ctxt->instate == XML_PARSER_EOF)
11850 goto done;
11851 if (RAW == '[') {
11852 ctxt->instate = XML_PARSER_DTD;
11853#ifdef DEBUG_PUSH
11854 xmlGenericError(xmlGenericErrorContext,
11855 "PP: entering DTD\n");
11856#endif
11857 } else {
11858 /*
11859 * Create and update the external subset.
11860 */
11861 ctxt->inSubset = 2;
11862 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11863 (ctxt->sax->externalSubset != NULL))
11864 ctxt->sax->externalSubset(ctxt->userData,
11865 ctxt->intSubName, ctxt->extSubSystem,
11866 ctxt->extSubURI);
11867 ctxt->inSubset = 0;
11868 xmlCleanSpecialAttr(ctxt);
11869 ctxt->instate = XML_PARSER_PROLOG;
11870#ifdef DEBUG_PUSH
11871 xmlGenericError(xmlGenericErrorContext,
11872 "PP: entering PROLOG\n");
11873#endif
11874 }
11875 } else if ((cur == '<') && (next == '!') &&
11876 (avail < 9)) {
11877 goto done;
11878 } else {
11879 ctxt->instate = XML_PARSER_START_TAG;
11880 ctxt->progressive = XML_PARSER_START_TAG;
11881 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11882#ifdef DEBUG_PUSH
11883 xmlGenericError(xmlGenericErrorContext,
11884 "PP: entering START_TAG\n");
11885#endif
11886 }
11887 break;
11888 case XML_PARSER_PROLOG:
11889 SKIP_BLANKS;
11890 if (ctxt->input->buf == NULL)
11891 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11892 else
11893 avail = xmlBufUse(ctxt->input->buf->buffer) -
11894 (ctxt->input->cur - ctxt->input->base);
11895 if (avail < 2)
11896 goto done;
11897 cur = ctxt->input->cur[0];
11898 next = ctxt->input->cur[1];
11899 if ((cur == '<') && (next == '?')) {
11900 if ((!terminate) &&
11901 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11902 ctxt->progressive = XML_PARSER_PI;
11903 goto done;
11904 }
11905#ifdef DEBUG_PUSH
11906 xmlGenericError(xmlGenericErrorContext,
11907 "PP: Parsing PI\n");
11908#endif
11909 xmlParsePI(ctxt);
11910 if (ctxt->instate == XML_PARSER_EOF)
11911 goto done;
11912 ctxt->instate = XML_PARSER_PROLOG;
11913 ctxt->progressive = 1;
11914 } else if ((cur == '<') && (next == '!') &&
11915 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11916 if ((!terminate) &&
11917 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11918 ctxt->progressive = XML_PARSER_COMMENT;
11919 goto done;
11920 }
11921#ifdef DEBUG_PUSH
11922 xmlGenericError(xmlGenericErrorContext,
11923 "PP: Parsing Comment\n");
11924#endif
11925 xmlParseComment(ctxt);
11926 if (ctxt->instate == XML_PARSER_EOF)
11927 goto done;
11928 ctxt->instate = XML_PARSER_PROLOG;
11929 ctxt->progressive = 1;
11930 } else if ((cur == '<') && (next == '!') &&
11931 (avail < 4)) {
11932 goto done;
11933 } else {
11934 ctxt->instate = XML_PARSER_START_TAG;
11935 if (ctxt->progressive == 0)
11936 ctxt->progressive = XML_PARSER_START_TAG;
11937 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11938#ifdef DEBUG_PUSH
11939 xmlGenericError(xmlGenericErrorContext,
11940 "PP: entering START_TAG\n");
11941#endif
11942 }
11943 break;
11944 case XML_PARSER_EPILOG:
11945 SKIP_BLANKS;
11946 if (ctxt->input->buf == NULL)
11947 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11948 else
11949 avail = xmlBufUse(ctxt->input->buf->buffer) -
11950 (ctxt->input->cur - ctxt->input->base);
11951 if (avail < 2)
11952 goto done;
11953 cur = ctxt->input->cur[0];
11954 next = ctxt->input->cur[1];
11955 if ((cur == '<') && (next == '?')) {
11956 if ((!terminate) &&
11957 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11958 ctxt->progressive = XML_PARSER_PI;
11959 goto done;
11960 }
11961#ifdef DEBUG_PUSH
11962 xmlGenericError(xmlGenericErrorContext,
11963 "PP: Parsing PI\n");
11964#endif
11965 xmlParsePI(ctxt);
11966 if (ctxt->instate == XML_PARSER_EOF)
11967 goto done;
11968 ctxt->instate = XML_PARSER_EPILOG;
11969 ctxt->progressive = 1;
11970 } else if ((cur == '<') && (next == '!') &&
11971 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11972 if ((!terminate) &&
11973 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11974 ctxt->progressive = XML_PARSER_COMMENT;
11975 goto done;
11976 }
11977#ifdef DEBUG_PUSH
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: Parsing Comment\n");
11980#endif
11981 xmlParseComment(ctxt);
11982 if (ctxt->instate == XML_PARSER_EOF)
11983 goto done;
11984 ctxt->instate = XML_PARSER_EPILOG;
11985 ctxt->progressive = 1;
11986 } else if ((cur == '<') && (next == '!') &&
11987 (avail < 4)) {
11988 goto done;
11989 } else {
11990 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991 ctxt->instate = XML_PARSER_EOF;
11992#ifdef DEBUG_PUSH
11993 xmlGenericError(xmlGenericErrorContext,
11994 "PP: entering EOF\n");
11995#endif
11996 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997 ctxt->sax->endDocument(ctxt->userData);
11998 goto done;
11999 }
12000 break;
12001 case XML_PARSER_DTD: {
12002 /*
12003 * Sorry but progressive parsing of the internal subset
12004 * is not expected to be supported. We first check that
12005 * the full content of the internal subset is available and
12006 * the parsing is launched only at that point.
12007 * Internal subset ends up with "']' S? '>'" in an unescaped
12008 * section and not in a ']]>' sequence which are conditional
12009 * sections (whoever argued to keep that crap in XML deserve
12010 * a place in hell !).
12011 */
12012 int base, i;
12013 xmlChar *buf;
12014 xmlChar quote = 0;
12015 size_t use;
12016
12017 base = ctxt->input->cur - ctxt->input->base;
12018 if (base < 0) return(0);
12019 if (ctxt->checkIndex > base)
12020 base = ctxt->checkIndex;
12021 buf = xmlBufContent(ctxt->input->buf->buffer);
12022 use = xmlBufUse(ctxt->input->buf->buffer);
12023 for (;(unsigned int) base < use; base++) {
12024 if (quote != 0) {
12025 if (buf[base] == quote)
12026 quote = 0;
12027 continue;
12028 }
12029 if ((quote == 0) && (buf[base] == '<')) {
12030 int found = 0;
12031 /* special handling of comments */
12032 if (((unsigned int) base + 4 < use) &&
12033 (buf[base + 1] == '!') &&
12034 (buf[base + 2] == '-') &&
12035 (buf[base + 3] == '-')) {
12036 for (;(unsigned int) base + 3 < use; base++) {
12037 if ((buf[base] == '-') &&
12038 (buf[base + 1] == '-') &&
12039 (buf[base + 2] == '>')) {
12040 found = 1;
12041 base += 2;
12042 break;
12043 }
12044 }
12045 if (!found) {
12046#if 0
12047 fprintf(stderr, "unfinished comment\n");
12048#endif
12049 break; /* for */
12050 }
12051 continue;
12052 }
12053 }
12054 if (buf[base] == '"') {
12055 quote = '"';
12056 continue;
12057 }
12058 if (buf[base] == '\'') {
12059 quote = '\'';
12060 continue;
12061 }
12062 if (buf[base] == ']') {
12063#if 0
12064 fprintf(stderr, "%c%c%c%c: ", buf[base],
12065 buf[base + 1], buf[base + 2], buf[base + 3]);
12066#endif
12067 if ((unsigned int) base +1 >= use)
12068 break;
12069 if (buf[base + 1] == ']') {
12070 /* conditional crap, skip both ']' ! */
12071 base++;
12072 continue;
12073 }
12074 for (i = 1; (unsigned int) base + i < use; i++) {
12075 if (buf[base + i] == '>') {
12076#if 0
12077 fprintf(stderr, "found\n");
12078#endif
12079 goto found_end_int_subset;
12080 }
12081 if (!IS_BLANK_CH(buf[base + i])) {
12082#if 0
12083 fprintf(stderr, "not found\n");
12084#endif
12085 goto not_end_of_int_subset;
12086 }
12087 }
12088#if 0
12089 fprintf(stderr, "end of stream\n");
12090#endif
12091 break;
12092
12093 }
12094not_end_of_int_subset:
12095 continue; /* for */
12096 }
12097 /*
12098 * We didn't found the end of the Internal subset
12099 */
12100 if (quote == 0)
12101 ctxt->checkIndex = base;
12102 else
12103 ctxt->checkIndex = 0;
12104#ifdef DEBUG_PUSH
12105 if (next == 0)
12106 xmlGenericError(xmlGenericErrorContext,
12107 "PP: lookup of int subset end filed\n");
12108#endif
12109 goto done;
12110
12111found_end_int_subset:
12112 ctxt->checkIndex = 0;
12113 xmlParseInternalSubset(ctxt);
12114 if (ctxt->instate == XML_PARSER_EOF)
12115 goto done;
12116 ctxt->inSubset = 2;
12117 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12118 (ctxt->sax->externalSubset != NULL))
12119 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12120 ctxt->extSubSystem, ctxt->extSubURI);
12121 ctxt->inSubset = 0;
12122 xmlCleanSpecialAttr(ctxt);
12123 if (ctxt->instate == XML_PARSER_EOF)
12124 goto done;
12125 ctxt->instate = XML_PARSER_PROLOG;
12126 ctxt->checkIndex = 0;
12127#ifdef DEBUG_PUSH
12128 xmlGenericError(xmlGenericErrorContext,
12129 "PP: entering PROLOG\n");
12130#endif
12131 break;
12132 }
12133 case XML_PARSER_COMMENT:
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: internal error, state == COMMENT\n");
12136 ctxt->instate = XML_PARSER_CONTENT;
12137#ifdef DEBUG_PUSH
12138 xmlGenericError(xmlGenericErrorContext,
12139 "PP: entering CONTENT\n");
12140#endif
12141 break;
12142 case XML_PARSER_IGNORE:
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: internal error, state == IGNORE");
12145 ctxt->instate = XML_PARSER_DTD;
12146#ifdef DEBUG_PUSH
12147 xmlGenericError(xmlGenericErrorContext,
12148 "PP: entering DTD\n");
12149#endif
12150 break;
12151 case XML_PARSER_PI:
12152 xmlGenericError(xmlGenericErrorContext,
12153 "PP: internal error, state == PI\n");
12154 ctxt->instate = XML_PARSER_CONTENT;
12155#ifdef DEBUG_PUSH
12156 xmlGenericError(xmlGenericErrorContext,
12157 "PP: entering CONTENT\n");
12158#endif
12159 break;
12160 case XML_PARSER_ENTITY_DECL:
12161 xmlGenericError(xmlGenericErrorContext,
12162 "PP: internal error, state == ENTITY_DECL\n");
12163 ctxt->instate = XML_PARSER_DTD;
12164#ifdef DEBUG_PUSH
12165 xmlGenericError(xmlGenericErrorContext,
12166 "PP: entering DTD\n");
12167#endif
12168 break;
12169 case XML_PARSER_ENTITY_VALUE:
12170 xmlGenericError(xmlGenericErrorContext,
12171 "PP: internal error, state == ENTITY_VALUE\n");
12172 ctxt->instate = XML_PARSER_CONTENT;
12173#ifdef DEBUG_PUSH
12174 xmlGenericError(xmlGenericErrorContext,
12175 "PP: entering DTD\n");
12176#endif
12177 break;
12178 case XML_PARSER_ATTRIBUTE_VALUE:
12179 xmlGenericError(xmlGenericErrorContext,
12180 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12181 ctxt->instate = XML_PARSER_START_TAG;
12182#ifdef DEBUG_PUSH
12183 xmlGenericError(xmlGenericErrorContext,
12184 "PP: entering START_TAG\n");
12185#endif
12186 break;
12187 case XML_PARSER_SYSTEM_LITERAL:
12188 xmlGenericError(xmlGenericErrorContext,
12189 "PP: internal error, state == SYSTEM_LITERAL\n");
12190 ctxt->instate = XML_PARSER_START_TAG;
12191#ifdef DEBUG_PUSH
12192 xmlGenericError(xmlGenericErrorContext,
12193 "PP: entering START_TAG\n");
12194#endif
12195 break;
12196 case XML_PARSER_PUBLIC_LITERAL:
12197 xmlGenericError(xmlGenericErrorContext,
12198 "PP: internal error, state == PUBLIC_LITERAL\n");
12199 ctxt->instate = XML_PARSER_START_TAG;
12200#ifdef DEBUG_PUSH
12201 xmlGenericError(xmlGenericErrorContext,
12202 "PP: entering START_TAG\n");
12203#endif
12204 break;
12205 }
12206 }
12207done:
12208#ifdef DEBUG_PUSH
12209 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12210#endif
12211 return(ret);
12212encoding_error:
12213 {
12214 char buffer[150];
12215
12216 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12217 ctxt->input->cur[0], ctxt->input->cur[1],
12218 ctxt->input->cur[2], ctxt->input->cur[3]);
12219 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12220 "Input is not proper UTF-8, indicate encoding !\n%s",
12221 BAD_CAST buffer, NULL);
12222 }
12223 return(0);
12224}
12225
12226/**
12227 * xmlParseCheckTransition:
12228 * @ctxt: an XML parser context
12229 * @chunk: a char array
12230 * @size: the size in byte of the chunk
12231 *
12232 * Check depending on the current parser state if the chunk given must be
12233 * processed immediately or one need more data to advance on parsing.
12234 *
12235 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12236 */
12237static int
12238xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12239 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12240 return(-1);
12241 if (ctxt->instate == XML_PARSER_START_TAG) {
12242 if (memchr(chunk, '>', size) != NULL)
12243 return(1);
12244 return(0);
12245 }
12246 if (ctxt->progressive == XML_PARSER_COMMENT) {
12247 if (memchr(chunk, '>', size) != NULL)
12248 return(1);
12249 return(0);
12250 }
12251 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12252 if (memchr(chunk, '>', size) != NULL)
12253 return(1);
12254 return(0);
12255 }
12256 if (ctxt->progressive == XML_PARSER_PI) {
12257 if (memchr(chunk, '>', size) != NULL)
12258 return(1);
12259 return(0);
12260 }
12261 if (ctxt->instate == XML_PARSER_END_TAG) {
12262 if (memchr(chunk, '>', size) != NULL)
12263 return(1);
12264 return(0);
12265 }
12266 if ((ctxt->progressive == XML_PARSER_DTD) ||
12267 (ctxt->instate == XML_PARSER_DTD)) {
12268 if (memchr(chunk, '>', size) != NULL)
12269 return(1);
12270 return(0);
12271 }
12272 return(1);
12273}
12274
12275/**
12276 * xmlParseChunk:
12277 * @ctxt: an XML parser context
12278 * @chunk: an char array
12279 * @size: the size in byte of the chunk
12280 * @terminate: last chunk indicator
12281 *
12282 * Parse a Chunk of memory
12283 *
12284 * Returns zero if no error, the xmlParserErrors otherwise.
12285 */
12286int
12287xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12288 int terminate) {
12289 int end_in_lf = 0;
12290 int remain = 0;
12291 size_t old_avail = 0;
12292 size_t avail = 0;
12293
12294 if (ctxt == NULL)
12295 return(XML_ERR_INTERNAL_ERROR);
12296 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12297 return(ctxt->errNo);
12298 if (ctxt->instate == XML_PARSER_EOF)
12299 return(-1);
12300 if (ctxt->instate == XML_PARSER_START)
12301 xmlDetectSAX2(ctxt);
12302 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12303 (chunk[size - 1] == '\r')) {
12304 end_in_lf = 1;
12305 size--;
12306 }
12307
12308xmldecl_done:
12309
12310 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12311 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12312 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12313 size_t cur = ctxt->input->cur - ctxt->input->base;
12314 int res;
12315
12316 old_avail = xmlBufUse(ctxt->input->buf->buffer);
12317 /*
12318 * Specific handling if we autodetected an encoding, we should not
12319 * push more than the first line ... which depend on the encoding
12320 * And only push the rest once the final encoding was detected
12321 */
12322 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12323 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12324 unsigned int len = 45;
12325
12326 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12327 BAD_CAST "UTF-16")) ||
12328 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12329 BAD_CAST "UTF16")))
12330 len = 90;
12331 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12332 BAD_CAST "UCS-4")) ||
12333 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12334 BAD_CAST "UCS4")))
12335 len = 180;
12336
12337 if (ctxt->input->buf->rawconsumed < len)
12338 len -= ctxt->input->buf->rawconsumed;
12339
12340 /*
12341 * Change size for reading the initial declaration only
12342 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12343 * will blindly copy extra bytes from memory.
12344 */
12345 if ((unsigned int) size > len) {
12346 remain = size - len;
12347 size = len;
12348 } else {
12349 remain = 0;
12350 }
12351 }
12352 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12353 if (res < 0) {
12354 ctxt->errNo = XML_PARSER_EOF;
12355 ctxt->disableSAX = 1;
12356 return (XML_PARSER_EOF);
12357 }
12358 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12359#ifdef DEBUG_PUSH
12360 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12361#endif
12362
12363 } else if (ctxt->instate != XML_PARSER_EOF) {
12364 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12365 xmlParserInputBufferPtr in = ctxt->input->buf;
12366 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12367 (in->raw != NULL)) {
12368 int nbchars;
12369 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12370 size_t current = ctxt->input->cur - ctxt->input->base;
12371
12372 nbchars = xmlCharEncInput(in, terminate);
12373 if (nbchars < 0) {
12374 /* TODO 2.6.0 */
12375 xmlGenericError(xmlGenericErrorContext,
12376 "xmlParseChunk: encoder error\n");
12377 return(XML_ERR_INVALID_ENCODING);
12378 }
12379 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12380 }
12381 }
12382 }
12383 if (remain != 0) {
12384 xmlParseTryOrFinish(ctxt, 0);
12385 } else {
12386 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12387 avail = xmlBufUse(ctxt->input->buf->buffer);
12388 /*
12389 * Depending on the current state it may not be such
12390 * a good idea to try parsing if there is nothing in the chunk
12391 * which would be worth doing a parser state transition and we
12392 * need to wait for more data
12393 */
12394 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12395 (old_avail == 0) || (avail == 0) ||
12396 (xmlParseCheckTransition(ctxt,
12397 (const char *)&ctxt->input->base[old_avail],
12398 avail - old_avail)))
12399 xmlParseTryOrFinish(ctxt, terminate);
12400 }
12401 if (ctxt->instate == XML_PARSER_EOF)
12402 return(ctxt->errNo);
12403
12404 if ((ctxt->input != NULL) &&
12405 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12406 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12407 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12408 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12409 ctxt->instate = XML_PARSER_EOF;
12410 }
12411 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12412 return(ctxt->errNo);
12413
12414 if (remain != 0) {
12415 chunk += size;
12416 size = remain;
12417 remain = 0;
12418 goto xmldecl_done;
12419 }
12420 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12421 (ctxt->input->buf != NULL)) {
12422 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12423 ctxt->input);
12424 size_t current = ctxt->input->cur - ctxt->input->base;
12425
12426 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12427
12428 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12429 base, current);
12430 }
12431 if (terminate) {
12432 /*
12433 * Check for termination
12434 */
12435 int cur_avail = 0;
12436
12437 if (ctxt->input != NULL) {
12438 if (ctxt->input->buf == NULL)
12439 cur_avail = ctxt->input->length -
12440 (ctxt->input->cur - ctxt->input->base);
12441 else
12442 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12443 (ctxt->input->cur - ctxt->input->base);
12444 }
12445
12446 if ((ctxt->instate != XML_PARSER_EOF) &&
12447 (ctxt->instate != XML_PARSER_EPILOG)) {
12448 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12449 }
12450 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12451 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12452 }
12453 if (ctxt->instate != XML_PARSER_EOF) {
12454 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12455 ctxt->sax->endDocument(ctxt->userData);
12456 }
12457 ctxt->instate = XML_PARSER_EOF;
12458 }
12459 if (ctxt->wellFormed == 0)
12460 return((xmlParserErrors) ctxt->errNo);
12461 else
12462 return(0);
12463}
12464
12465/************************************************************************
12466 * *
12467 * I/O front end functions to the parser *
12468 * *
12469 ************************************************************************/
12470
12471/**
12472 * xmlCreatePushParserCtxt:
12473 * @sax: a SAX handler
12474 * @user_data: The user data returned on SAX callbacks
12475 * @chunk: a pointer to an array of chars
12476 * @size: number of chars in the array
12477 * @filename: an optional file name or URI
12478 *
12479 * Create a parser context for using the XML parser in push mode.
12480 * If @buffer and @size are non-NULL, the data is used to detect
12481 * the encoding. The remaining characters will be parsed so they
12482 * don't need to be fed in again through xmlParseChunk.
12483 * To allow content encoding detection, @size should be >= 4
12484 * The value of @filename is used for fetching external entities
12485 * and error/warning reports.
12486 *
12487 * Returns the new parser context or NULL
12488 */
12489
12490xmlParserCtxtPtr
12491xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12492 const char *chunk, int size, const char *filename) {
12493 xmlParserCtxtPtr ctxt;
12494 xmlParserInputPtr inputStream;
12495 xmlParserInputBufferPtr buf;
12496 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12497
12498 /*
12499 * plug some encoding conversion routines
12500 */
12501 if ((chunk != NULL) && (size >= 4))
12502 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12503
12504 buf = xmlAllocParserInputBuffer(enc);
12505 if (buf == NULL) return(NULL);
12506
12507 ctxt = xmlNewParserCtxt();
12508 if (ctxt == NULL) {
12509 xmlErrMemory(NULL, "creating parser: out of memory\n");
12510 xmlFreeParserInputBuffer(buf);
12511 return(NULL);
12512 }
12513 ctxt->dictNames = 1;
12514 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12515 if (ctxt->pushTab == NULL) {
12516 xmlErrMemory(ctxt, NULL);
12517 xmlFreeParserInputBuffer(buf);
12518 xmlFreeParserCtxt(ctxt);
12519 return(NULL);
12520 }
12521 if (sax != NULL) {
12522#ifdef LIBXML_SAX1_ENABLED
12523 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12524#endif /* LIBXML_SAX1_ENABLED */
12525 xmlFree(ctxt->sax);
12526 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12527 if (ctxt->sax == NULL) {
12528 xmlErrMemory(ctxt, NULL);
12529 xmlFreeParserInputBuffer(buf);
12530 xmlFreeParserCtxt(ctxt);
12531 return(NULL);
12532 }
12533 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12534 if (sax->initialized == XML_SAX2_MAGIC)
12535 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12536 else
12537 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12538 if (user_data != NULL)
12539 ctxt->userData = user_data;
12540 }
12541 if (filename == NULL) {
12542 ctxt->directory = NULL;
12543 } else {
12544 ctxt->directory = xmlParserGetDirectory(filename);
12545 }
12546
12547 inputStream = xmlNewInputStream(ctxt);
12548 if (inputStream == NULL) {
12549 xmlFreeParserCtxt(ctxt);
12550 xmlFreeParserInputBuffer(buf);
12551 return(NULL);
12552 }
12553
12554 if (filename == NULL)
12555 inputStream->filename = NULL;
12556 else {
12557 inputStream->filename = (char *)
12558 xmlCanonicPath((const xmlChar *) filename);
12559 if (inputStream->filename == NULL) {
12560 xmlFreeParserCtxt(ctxt);
12561 xmlFreeParserInputBuffer(buf);
12562 return(NULL);
12563 }
12564 }
12565 inputStream->buf = buf;
12566 xmlBufResetInput(inputStream->buf->buffer, inputStream);
12567 inputPush(ctxt, inputStream);
12568
12569 /*
12570 * If the caller didn't provide an initial 'chunk' for determining
12571 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12572 * that it can be automatically determined later
12573 */
12574 if ((size == 0) || (chunk == NULL)) {
12575 ctxt->charset = XML_CHAR_ENCODING_NONE;
12576 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12577 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12578 size_t cur = ctxt->input->cur - ctxt->input->base;
12579
12580 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12581
12582 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12583#ifdef DEBUG_PUSH
12584 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12585#endif
12586 }
12587
12588 if (enc != XML_CHAR_ENCODING_NONE) {
12589 xmlSwitchEncoding(ctxt, enc);
12590 }
12591
12592 return(ctxt);
12593}
12594#endif /* LIBXML_PUSH_ENABLED */
12595
12596/**
12597 * xmlStopParser:
12598 * @ctxt: an XML parser context
12599 *
12600 * Blocks further parser processing
12601 */
12602void
12603xmlStopParser(xmlParserCtxtPtr ctxt) {
12604 if (ctxt == NULL)
12605 return;
12606 ctxt->instate = XML_PARSER_EOF;
12607 ctxt->errNo = XML_ERR_USER_STOP;
12608 ctxt->disableSAX = 1;
12609 if (ctxt->input != NULL) {
12610 ctxt->input->cur = BAD_CAST"";
12611 ctxt->input->base = ctxt->input->cur;
12612 }
12613}
12614
12615/**
12616 * xmlCreateIOParserCtxt:
12617 * @sax: a SAX handler
12618 * @user_data: The user data returned on SAX callbacks
12619 * @ioread: an I/O read function
12620 * @ioclose: an I/O close function
12621 * @ioctx: an I/O handler
12622 * @enc: the charset encoding if known
12623 *
12624 * Create a parser context for using the XML parser with an existing
12625 * I/O stream
12626 *
12627 * Returns the new parser context or NULL
12628 */
12629xmlParserCtxtPtr
12630xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12631 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12632 void *ioctx, xmlCharEncoding enc) {
12633 xmlParserCtxtPtr ctxt;
12634 xmlParserInputPtr inputStream;
12635 xmlParserInputBufferPtr buf;
12636
12637 if (ioread == NULL) return(NULL);
12638
12639 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12640 if (buf == NULL) {
12641 if (ioclose != NULL)
12642 ioclose(ioctx);
12643 return (NULL);
12644 }
12645
12646 ctxt = xmlNewParserCtxt();
12647 if (ctxt == NULL) {
12648 xmlFreeParserInputBuffer(buf);
12649 return(NULL);
12650 }
12651 if (sax != NULL) {
12652#ifdef LIBXML_SAX1_ENABLED
12653 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12654#endif /* LIBXML_SAX1_ENABLED */
12655 xmlFree(ctxt->sax);
12656 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12657 if (ctxt->sax == NULL) {
12658 xmlErrMemory(ctxt, NULL);
12659 xmlFreeParserCtxt(ctxt);
12660 return(NULL);
12661 }
12662 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12663 if (sax->initialized == XML_SAX2_MAGIC)
12664 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12665 else
12666 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12667 if (user_data != NULL)
12668 ctxt->userData = user_data;
12669 }
12670
12671 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12672 if (inputStream == NULL) {
12673 xmlFreeParserCtxt(ctxt);
12674 return(NULL);
12675 }
12676 inputPush(ctxt, inputStream);
12677
12678 return(ctxt);
12679}
12680
12681#ifdef LIBXML_VALID_ENABLED
12682/************************************************************************
12683 * *
12684 * Front ends when parsing a DTD *
12685 * *
12686 ************************************************************************/
12687
12688/**
12689 * xmlIOParseDTD:
12690 * @sax: the SAX handler block or NULL
12691 * @input: an Input Buffer
12692 * @enc: the charset encoding if known
12693 *
12694 * Load and parse a DTD
12695 *
12696 * Returns the resulting xmlDtdPtr or NULL in case of error.
12697 * @input will be freed by the function in any case.
12698 */
12699
12700xmlDtdPtr
12701xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12702 xmlCharEncoding enc) {
12703 xmlDtdPtr ret = NULL;
12704 xmlParserCtxtPtr ctxt;
12705 xmlParserInputPtr pinput = NULL;
12706 xmlChar start[4];
12707
12708 if (input == NULL)
12709 return(NULL);
12710
12711 ctxt = xmlNewParserCtxt();
12712 if (ctxt == NULL) {
12713 xmlFreeParserInputBuffer(input);
12714 return(NULL);
12715 }
12716
12717 /* We are loading a DTD */
12718 ctxt->options |= XML_PARSE_DTDLOAD;
12719
12720 /*
12721 * Set-up the SAX context
12722 */
12723 if (sax != NULL) {
12724 if (ctxt->sax != NULL)
12725 xmlFree(ctxt->sax);
12726 ctxt->sax = sax;
12727 ctxt->userData = ctxt;
12728 }
12729 xmlDetectSAX2(ctxt);
12730
12731 /*
12732 * generate a parser input from the I/O handler
12733 */
12734
12735 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12736 if (pinput == NULL) {
12737 if (sax != NULL) ctxt->sax = NULL;
12738 xmlFreeParserInputBuffer(input);
12739 xmlFreeParserCtxt(ctxt);
12740 return(NULL);
12741 }
12742
12743 /*
12744 * plug some encoding conversion routines here.
12745 */
12746 if (xmlPushInput(ctxt, pinput) < 0) {
12747 if (sax != NULL) ctxt->sax = NULL;
12748 xmlFreeParserCtxt(ctxt);
12749 return(NULL);
12750 }
12751 if (enc != XML_CHAR_ENCODING_NONE) {
12752 xmlSwitchEncoding(ctxt, enc);
12753 }
12754
12755 pinput->filename = NULL;
12756 pinput->line = 1;
12757 pinput->col = 1;
12758 pinput->base = ctxt->input->cur;
12759 pinput->cur = ctxt->input->cur;
12760 pinput->free = NULL;
12761
12762 /*
12763 * let's parse that entity knowing it's an external subset.
12764 */
12765 ctxt->inSubset = 2;
12766 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12767 if (ctxt->myDoc == NULL) {
12768 xmlErrMemory(ctxt, "New Doc failed");
12769 return(NULL);
12770 }
12771 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12772 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12773 BAD_CAST "none", BAD_CAST "none");
12774
12775 if ((enc == XML_CHAR_ENCODING_NONE) &&
12776 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12777 /*
12778 * Get the 4 first bytes and decode the charset
12779 * if enc != XML_CHAR_ENCODING_NONE
12780 * plug some encoding conversion routines.
12781 */
12782 start[0] = RAW;
12783 start[1] = NXT(1);
12784 start[2] = NXT(2);
12785 start[3] = NXT(3);
12786 enc = xmlDetectCharEncoding(start, 4);
12787 if (enc != XML_CHAR_ENCODING_NONE) {
12788 xmlSwitchEncoding(ctxt, enc);
12789 }
12790 }
12791
12792 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12793
12794 if (ctxt->myDoc != NULL) {
12795 if (ctxt->wellFormed) {
12796 ret = ctxt->myDoc->extSubset;
12797 ctxt->myDoc->extSubset = NULL;
12798 if (ret != NULL) {
12799 xmlNodePtr tmp;
12800
12801 ret->doc = NULL;
12802 tmp = ret->children;
12803 while (tmp != NULL) {
12804 tmp->doc = NULL;
12805 tmp = tmp->next;
12806 }
12807 }
12808 } else {
12809 ret = NULL;
12810 }
12811 xmlFreeDoc(ctxt->myDoc);
12812 ctxt->myDoc = NULL;
12813 }
12814 if (sax != NULL) ctxt->sax = NULL;
12815 xmlFreeParserCtxt(ctxt);
12816
12817 return(ret);
12818}
12819
12820/**
12821 * xmlSAXParseDTD:
12822 * @sax: the SAX handler block
12823 * @ExternalID: a NAME* containing the External ID of the DTD
12824 * @SystemID: a NAME* containing the URL to the DTD
12825 *
12826 * Load and parse an external subset.
12827 *
12828 * Returns the resulting xmlDtdPtr or NULL in case of error.
12829 */
12830
12831xmlDtdPtr
12832xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12833 const xmlChar *SystemID) {
12834 xmlDtdPtr ret = NULL;
12835 xmlParserCtxtPtr ctxt;
12836 xmlParserInputPtr input = NULL;
12837 xmlCharEncoding enc;
12838 xmlChar* systemIdCanonic;
12839
12840 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12841
12842 ctxt = xmlNewParserCtxt();
12843 if (ctxt == NULL) {
12844 return(NULL);
12845 }
12846
12847 /* We are loading a DTD */
12848 ctxt->options |= XML_PARSE_DTDLOAD;
12849
12850 /*
12851 * Set-up the SAX context
12852 */
12853 if (sax != NULL) {
12854 if (ctxt->sax != NULL)
12855 xmlFree(ctxt->sax);
12856 ctxt->sax = sax;
12857 ctxt->userData = ctxt;
12858 }
12859
12860 /*
12861 * Canonicalise the system ID
12862 */
12863 systemIdCanonic = xmlCanonicPath(SystemID);
12864 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12865 xmlFreeParserCtxt(ctxt);
12866 return(NULL);
12867 }
12868
12869 /*
12870 * Ask the Entity resolver to load the damn thing
12871 */
12872
12873 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12874 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12875 systemIdCanonic);
12876 if (input == NULL) {
12877 if (sax != NULL) ctxt->sax = NULL;
12878 xmlFreeParserCtxt(ctxt);
12879 if (systemIdCanonic != NULL)
12880 xmlFree(systemIdCanonic);
12881 return(NULL);
12882 }
12883
12884 /*
12885 * plug some encoding conversion routines here.
12886 */
12887 if (xmlPushInput(ctxt, input) < 0) {
12888 if (sax != NULL) ctxt->sax = NULL;
12889 xmlFreeParserCtxt(ctxt);
12890 if (systemIdCanonic != NULL)
12891 xmlFree(systemIdCanonic);
12892 return(NULL);
12893 }
12894 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12895 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12896 xmlSwitchEncoding(ctxt, enc);
12897 }
12898
12899 if (input->filename == NULL)
12900 input->filename = (char *) systemIdCanonic;
12901 else
12902 xmlFree(systemIdCanonic);
12903 input->line = 1;
12904 input->col = 1;
12905 input->base = ctxt->input->cur;
12906 input->cur = ctxt->input->cur;
12907 input->free = NULL;
12908
12909 /*
12910 * let's parse that entity knowing it's an external subset.
12911 */
12912 ctxt->inSubset = 2;
12913 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12914 if (ctxt->myDoc == NULL) {
12915 xmlErrMemory(ctxt, "New Doc failed");
12916 if (sax != NULL) ctxt->sax = NULL;
12917 xmlFreeParserCtxt(ctxt);
12918 return(NULL);
12919 }
12920 ctxt->myDoc->properties = XML_DOC_INTERNAL;
12921 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12922 ExternalID, SystemID);
12923 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12924
12925 if (ctxt->myDoc != NULL) {
12926 if (ctxt->wellFormed) {
12927 ret = ctxt->myDoc->extSubset;
12928 ctxt->myDoc->extSubset = NULL;
12929 if (ret != NULL) {
12930 xmlNodePtr tmp;
12931
12932 ret->doc = NULL;
12933 tmp = ret->children;
12934 while (tmp != NULL) {
12935 tmp->doc = NULL;
12936 tmp = tmp->next;
12937 }
12938 }
12939 } else {
12940 ret = NULL;
12941 }
12942 xmlFreeDoc(ctxt->myDoc);
12943 ctxt->myDoc = NULL;
12944 }
12945 if (sax != NULL) ctxt->sax = NULL;
12946 xmlFreeParserCtxt(ctxt);
12947
12948 return(ret);
12949}
12950
12951
12952/**
12953 * xmlParseDTD:
12954 * @ExternalID: a NAME* containing the External ID of the DTD
12955 * @SystemID: a NAME* containing the URL to the DTD
12956 *
12957 * Load and parse an external subset.
12958 *
12959 * Returns the resulting xmlDtdPtr or NULL in case of error.
12960 */
12961
12962xmlDtdPtr
12963xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12964 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12965}
12966#endif /* LIBXML_VALID_ENABLED */
12967
12968/************************************************************************
12969 * *
12970 * Front ends when parsing an Entity *
12971 * *
12972 ************************************************************************/
12973
12974/**
12975 * xmlParseCtxtExternalEntity:
12976 * @ctx: the existing parsing context
12977 * @URL: the URL for the entity to load
12978 * @ID: the System ID for the entity to load
12979 * @lst: the return value for the set of parsed nodes
12980 *
12981 * Parse an external general entity within an existing parsing context
12982 * An external general parsed entity is well-formed if it matches the
12983 * production labeled extParsedEnt.
12984 *
12985 * [78] extParsedEnt ::= TextDecl? content
12986 *
12987 * Returns 0 if the entity is well formed, -1 in case of args problem and
12988 * the parser error code otherwise
12989 */
12990
12991int
12992xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12993 const xmlChar *ID, xmlNodePtr *lst) {
12994 xmlParserCtxtPtr ctxt;
12995 xmlDocPtr newDoc;
12996 xmlNodePtr newRoot;
12997 xmlSAXHandlerPtr oldsax = NULL;
12998 int ret = 0;
12999 xmlChar start[4];
13000 xmlCharEncoding enc;
13001
13002 if (ctx == NULL) return(-1);
13003
13004 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13005 (ctx->depth > 1024)) {
13006 return(XML_ERR_ENTITY_LOOP);
13007 }
13008
13009 if (lst != NULL)
13010 *lst = NULL;
13011 if ((URL == NULL) && (ID == NULL))
13012 return(-1);
13013 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13014 return(-1);
13015
13016 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
13017 if (ctxt == NULL) {
13018 return(-1);
13019 }
13020
13021 oldsax = ctxt->sax;
13022 ctxt->sax = ctx->sax;
13023 xmlDetectSAX2(ctxt);
13024 newDoc = xmlNewDoc(BAD_CAST "1.0");
13025 if (newDoc == NULL) {
13026 xmlFreeParserCtxt(ctxt);
13027 return(-1);
13028 }
13029 newDoc->properties = XML_DOC_INTERNAL;
13030 if (ctx->myDoc->dict) {
13031 newDoc->dict = ctx->myDoc->dict;
13032 xmlDictReference(newDoc->dict);
13033 }
13034 if (ctx->myDoc != NULL) {
13035 newDoc->intSubset = ctx->myDoc->intSubset;
13036 newDoc->extSubset = ctx->myDoc->extSubset;
13037 }
13038 if (ctx->myDoc->URL != NULL) {
13039 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13040 }
13041 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13042 if (newRoot == NULL) {
13043 ctxt->sax = oldsax;
13044 xmlFreeParserCtxt(ctxt);
13045 newDoc->intSubset = NULL;
13046 newDoc->extSubset = NULL;
13047 xmlFreeDoc(newDoc);
13048 return(-1);
13049 }
13050 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13051 nodePush(ctxt, newDoc->children);
13052 if (ctx->myDoc == NULL) {
13053 ctxt->myDoc = newDoc;
13054 } else {
13055 ctxt->myDoc = ctx->myDoc;
13056 newDoc->children->doc = ctx->myDoc;
13057 }
13058
13059 /*
13060 * Get the 4 first bytes and decode the charset
13061 * if enc != XML_CHAR_ENCODING_NONE
13062 * plug some encoding conversion routines.
13063 */
13064 GROW
13065 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13066 start[0] = RAW;
13067 start[1] = NXT(1);
13068 start[2] = NXT(2);
13069 start[3] = NXT(3);
13070 enc = xmlDetectCharEncoding(start, 4);
13071 if (enc != XML_CHAR_ENCODING_NONE) {
13072 xmlSwitchEncoding(ctxt, enc);
13073 }
13074 }
13075
13076 /*
13077 * Parse a possible text declaration first
13078 */
13079 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13080 xmlParseTextDecl(ctxt);
13081 /*
13082 * An XML-1.0 document can't reference an entity not XML-1.0
13083 */
13084 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13085 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13086 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13087 "Version mismatch between document and entity\n");
13088 }
13089 }
13090
13091 /*
13092 * If the user provided its own SAX callbacks then reuse the
13093 * useData callback field, otherwise the expected setup in a
13094 * DOM builder is to have userData == ctxt
13095 */
13096 if (ctx->userData == ctx)
13097 ctxt->userData = ctxt;
13098 else
13099 ctxt->userData = ctx->userData;
13100
13101 /*
13102 * Doing validity checking on chunk doesn't make sense
13103 */
13104 ctxt->instate = XML_PARSER_CONTENT;
13105 ctxt->validate = ctx->validate;
13106 ctxt->valid = ctx->valid;
13107 ctxt->loadsubset = ctx->loadsubset;
13108 ctxt->depth = ctx->depth + 1;
13109 ctxt->replaceEntities = ctx->replaceEntities;
13110 if (ctxt->validate) {
13111 ctxt->vctxt.error = ctx->vctxt.error;
13112 ctxt->vctxt.warning = ctx->vctxt.warning;
13113 } else {
13114 ctxt->vctxt.error = NULL;
13115 ctxt->vctxt.warning = NULL;
13116 }
13117 ctxt->vctxt.nodeTab = NULL;
13118 ctxt->vctxt.nodeNr = 0;
13119 ctxt->vctxt.nodeMax = 0;
13120 ctxt->vctxt.node = NULL;
13121 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13122 ctxt->dict = ctx->dict;
13123 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13124 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13125 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13126 ctxt->dictNames = ctx->dictNames;
13127 ctxt->attsDefault = ctx->attsDefault;
13128 ctxt->attsSpecial = ctx->attsSpecial;
13129 ctxt->linenumbers = ctx->linenumbers;
13130
13131 xmlParseContent(ctxt);
13132
13133 ctx->validate = ctxt->validate;
13134 ctx->valid = ctxt->valid;
13135 if ((RAW == '<') && (NXT(1) == '/')) {
13136 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13137 } else if (RAW != 0) {
13138 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13139 }
13140 if (ctxt->node != newDoc->children) {
13141 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13142 }
13143
13144 if (!ctxt->wellFormed) {
13145 if (ctxt->errNo == 0)
13146 ret = 1;
13147 else
13148 ret = ctxt->errNo;
13149 } else {
13150 if (lst != NULL) {
13151 xmlNodePtr cur;
13152
13153 /*
13154 * Return the newly created nodeset after unlinking it from
13155 * they pseudo parent.
13156 */
13157 cur = newDoc->children->children;
13158 *lst = cur;
13159 while (cur != NULL) {
13160 cur->parent = NULL;
13161 cur = cur->next;
13162 }
13163 newDoc->children->children = NULL;
13164 }
13165 ret = 0;
13166 }
13167 ctxt->sax = oldsax;
13168 ctxt->dict = NULL;
13169 ctxt->attsDefault = NULL;
13170 ctxt->attsSpecial = NULL;
13171 xmlFreeParserCtxt(ctxt);
13172 newDoc->intSubset = NULL;
13173 newDoc->extSubset = NULL;
13174 xmlFreeDoc(newDoc);
13175
13176 return(ret);
13177}
13178
13179/**
13180 * xmlParseExternalEntityPrivate:
13181 * @doc: the document the chunk pertains to
13182 * @oldctxt: the previous parser context if available
13183 * @sax: the SAX handler bloc (possibly NULL)
13184 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13185 * @depth: Used for loop detection, use 0
13186 * @URL: the URL for the entity to load
13187 * @ID: the System ID for the entity to load
13188 * @list: the return value for the set of parsed nodes
13189 *
13190 * Private version of xmlParseExternalEntity()
13191 *
13192 * Returns 0 if the entity is well formed, -1 in case of args problem and
13193 * the parser error code otherwise
13194 */
13195
13196static xmlParserErrors
13197xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13198 xmlSAXHandlerPtr sax,
13199 void *user_data, int depth, const xmlChar *URL,
13200 const xmlChar *ID, xmlNodePtr *list) {
13201 xmlParserCtxtPtr ctxt;
13202 xmlDocPtr newDoc;
13203 xmlNodePtr newRoot;
13204 xmlSAXHandlerPtr oldsax = NULL;
13205 xmlParserErrors ret = XML_ERR_OK;
13206 xmlChar start[4];
13207 xmlCharEncoding enc;
13208
13209 if (((depth > 40) &&
13210 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13211 (depth > 1024)) {
13212 return(XML_ERR_ENTITY_LOOP);
13213 }
13214
13215 if (list != NULL)
13216 *list = NULL;
13217 if ((URL == NULL) && (ID == NULL))
13218 return(XML_ERR_INTERNAL_ERROR);
13219 if (doc == NULL)
13220 return(XML_ERR_INTERNAL_ERROR);
13221
13222
13223 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13224 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13225 ctxt->userData = ctxt;
13226 if (oldctxt != NULL) {
13227 ctxt->_private = oldctxt->_private;
13228 ctxt->loadsubset = oldctxt->loadsubset;
13229 ctxt->validate = oldctxt->validate;
13230 ctxt->external = oldctxt->external;
13231 ctxt->record_info = oldctxt->record_info;
13232 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13233 ctxt->node_seq.length = oldctxt->node_seq.length;
13234 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13235 } else {
13236 /*
13237 * Doing validity checking on chunk without context
13238 * doesn't make sense
13239 */
13240 ctxt->_private = NULL;
13241 ctxt->validate = 0;
13242 ctxt->external = 2;
13243 ctxt->loadsubset = 0;
13244 }
13245 if (sax != NULL) {
13246 oldsax = ctxt->sax;
13247 ctxt->sax = sax;
13248 if (user_data != NULL)
13249 ctxt->userData = user_data;
13250 }
13251 xmlDetectSAX2(ctxt);
13252 newDoc = xmlNewDoc(BAD_CAST "1.0");
13253 if (newDoc == NULL) {
13254 ctxt->node_seq.maximum = 0;
13255 ctxt->node_seq.length = 0;
13256 ctxt->node_seq.buffer = NULL;
13257 xmlFreeParserCtxt(ctxt);
13258 return(XML_ERR_INTERNAL_ERROR);
13259 }
13260 newDoc->properties = XML_DOC_INTERNAL;
13261 newDoc->intSubset = doc->intSubset;
13262 newDoc->extSubset = doc->extSubset;
13263 newDoc->dict = doc->dict;
13264 xmlDictReference(newDoc->dict);
13265
13266 if (doc->URL != NULL) {
13267 newDoc->URL = xmlStrdup(doc->URL);
13268 }
13269 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13270 if (newRoot == NULL) {
13271 if (sax != NULL)
13272 ctxt->sax = oldsax;
13273 ctxt->node_seq.maximum = 0;
13274 ctxt->node_seq.length = 0;
13275 ctxt->node_seq.buffer = NULL;
13276 xmlFreeParserCtxt(ctxt);
13277 newDoc->intSubset = NULL;
13278 newDoc->extSubset = NULL;
13279 xmlFreeDoc(newDoc);
13280 return(XML_ERR_INTERNAL_ERROR);
13281 }
13282 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13283 nodePush(ctxt, newDoc->children);
13284 ctxt->myDoc = doc;
13285 newRoot->doc = doc;
13286
13287 /*
13288 * Get the 4 first bytes and decode the charset
13289 * if enc != XML_CHAR_ENCODING_NONE
13290 * plug some encoding conversion routines.
13291 */
13292 GROW;
13293 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13294 start[0] = RAW;
13295 start[1] = NXT(1);
13296 start[2] = NXT(2);
13297 start[3] = NXT(3);
13298 enc = xmlDetectCharEncoding(start, 4);
13299 if (enc != XML_CHAR_ENCODING_NONE) {
13300 xmlSwitchEncoding(ctxt, enc);
13301 }
13302 }
13303
13304 /*
13305 * Parse a possible text declaration first
13306 */
13307 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13308 xmlParseTextDecl(ctxt);
13309 }
13310
13311 ctxt->instate = XML_PARSER_CONTENT;
13312 ctxt->depth = depth;
13313
13314 xmlParseContent(ctxt);
13315
13316 if ((RAW == '<') && (NXT(1) == '/')) {
13317 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13318 } else if (RAW != 0) {
13319 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13320 }
13321 if (ctxt->node != newDoc->children) {
13322 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13323 }
13324
13325 if (!ctxt->wellFormed) {
13326 if (ctxt->errNo == 0)
13327 ret = XML_ERR_INTERNAL_ERROR;
13328 else
13329 ret = (xmlParserErrors)ctxt->errNo;
13330 } else {
13331 if (list != NULL) {
13332 xmlNodePtr cur;
13333
13334 /*
13335 * Return the newly created nodeset after unlinking it from
13336 * they pseudo parent.
13337 */
13338 cur = newDoc->children->children;
13339 *list = cur;
13340 while (cur != NULL) {
13341 cur->parent = NULL;
13342 cur = cur->next;
13343 }
13344 newDoc->children->children = NULL;
13345 }
13346 ret = XML_ERR_OK;
13347 }
13348
13349 /*
13350 * Record in the parent context the number of entities replacement
13351 * done when parsing that reference.
13352 */
13353 if (oldctxt != NULL)
13354 oldctxt->nbentities += ctxt->nbentities;
13355
13356 /*
13357 * Also record the size of the entity parsed
13358 */
13359 if (ctxt->input != NULL) {
13360 oldctxt->sizeentities += ctxt->input->consumed;
13361 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13362 }
13363 /*
13364 * And record the last error if any
13365 */
13366 if (ctxt->lastError.code != XML_ERR_OK)
13367 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13368
13369 if (sax != NULL)
13370 ctxt->sax = oldsax;
13371 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13372 oldctxt->node_seq.length = ctxt->node_seq.length;
13373 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13374 ctxt->node_seq.maximum = 0;
13375 ctxt->node_seq.length = 0;
13376 ctxt->node_seq.buffer = NULL;
13377 xmlFreeParserCtxt(ctxt);
13378 newDoc->intSubset = NULL;
13379 newDoc->extSubset = NULL;
13380 xmlFreeDoc(newDoc);
13381
13382 return(ret);
13383}
13384
13385#ifdef LIBXML_SAX1_ENABLED
13386/**
13387 * xmlParseExternalEntity:
13388 * @doc: the document the chunk pertains to
13389 * @sax: the SAX handler bloc (possibly NULL)
13390 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13391 * @depth: Used for loop detection, use 0
13392 * @URL: the URL for the entity to load
13393 * @ID: the System ID for the entity to load
13394 * @lst: the return value for the set of parsed nodes
13395 *
13396 * Parse an external general entity
13397 * An external general parsed entity is well-formed if it matches the
13398 * production labeled extParsedEnt.
13399 *
13400 * [78] extParsedEnt ::= TextDecl? content
13401 *
13402 * Returns 0 if the entity is well formed, -1 in case of args problem and
13403 * the parser error code otherwise
13404 */
13405
13406int
13407xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13408 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13409 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13410 ID, lst));
13411}
13412
13413/**
13414 * xmlParseBalancedChunkMemory:
13415 * @doc: the document the chunk pertains to
13416 * @sax: the SAX handler bloc (possibly NULL)
13417 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13418 * @depth: Used for loop detection, use 0
13419 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13420 * @lst: the return value for the set of parsed nodes
13421 *
13422 * Parse a well-balanced chunk of an XML document
13423 * called by the parser
13424 * The allowed sequence for the Well Balanced Chunk is the one defined by
13425 * the content production in the XML grammar:
13426 *
13427 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13428 *
13429 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13430 * the parser error code otherwise
13431 */
13432
13433int
13434xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13435 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13436 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13437 depth, string, lst, 0 );
13438}
13439#endif /* LIBXML_SAX1_ENABLED */
13440
13441/**
13442 * xmlParseBalancedChunkMemoryInternal:
13443 * @oldctxt: the existing parsing context
13444 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13445 * @user_data: the user data field for the parser context
13446 * @lst: the return value for the set of parsed nodes
13447 *
13448 *
13449 * Parse a well-balanced chunk of an XML document
13450 * called by the parser
13451 * The allowed sequence for the Well Balanced Chunk is the one defined by
13452 * the content production in the XML grammar:
13453 *
13454 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13455 *
13456 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13457 * error code otherwise
13458 *
13459 * In case recover is set to 1, the nodelist will not be empty even if
13460 * the parsed chunk is not well balanced.
13461 */
13462static xmlParserErrors
13463xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13464 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13465 xmlParserCtxtPtr ctxt;
13466 xmlDocPtr newDoc = NULL;
13467 xmlNodePtr newRoot;
13468 xmlSAXHandlerPtr oldsax = NULL;
13469 xmlNodePtr content = NULL;
13470 xmlNodePtr last = NULL;
13471 int size;
13472 xmlParserErrors ret = XML_ERR_OK;
13473#ifdef SAX2
13474 int i;
13475#endif
13476
13477 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13478 (oldctxt->depth > 1024)) {
13479 return(XML_ERR_ENTITY_LOOP);
13480 }
13481
13482
13483 if (lst != NULL)
13484 *lst = NULL;
13485 if (string == NULL)
13486 return(XML_ERR_INTERNAL_ERROR);
13487
13488 size = xmlStrlen(string);
13489
13490 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13491 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13492 if (user_data != NULL)
13493 ctxt->userData = user_data;
13494 else
13495 ctxt->userData = ctxt;
13496 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13497 ctxt->dict = oldctxt->dict;
13498 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13499 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13500 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13501
13502#ifdef SAX2
13503 /* propagate namespaces down the entity */
13504 for (i = 0;i < oldctxt->nsNr;i += 2) {
13505 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13506 }
13507#endif
13508
13509 oldsax = ctxt->sax;
13510 ctxt->sax = oldctxt->sax;
13511 xmlDetectSAX2(ctxt);
13512 ctxt->replaceEntities = oldctxt->replaceEntities;
13513 ctxt->options = oldctxt->options;
13514
13515 ctxt->_private = oldctxt->_private;
13516 if (oldctxt->myDoc == NULL) {
13517 newDoc = xmlNewDoc(BAD_CAST "1.0");
13518 if (newDoc == NULL) {
13519 ctxt->sax = oldsax;
13520 ctxt->dict = NULL;
13521 xmlFreeParserCtxt(ctxt);
13522 return(XML_ERR_INTERNAL_ERROR);
13523 }
13524 newDoc->properties = XML_DOC_INTERNAL;
13525 newDoc->dict = ctxt->dict;
13526 xmlDictReference(newDoc->dict);
13527 ctxt->myDoc = newDoc;
13528 } else {
13529 ctxt->myDoc = oldctxt->myDoc;
13530 content = ctxt->myDoc->children;
13531 last = ctxt->myDoc->last;
13532 }
13533 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13534 if (newRoot == NULL) {
13535 ctxt->sax = oldsax;
13536 ctxt->dict = NULL;
13537 xmlFreeParserCtxt(ctxt);
13538 if (newDoc != NULL) {
13539 xmlFreeDoc(newDoc);
13540 }
13541 return(XML_ERR_INTERNAL_ERROR);
13542 }
13543 ctxt->myDoc->children = NULL;
13544 ctxt->myDoc->last = NULL;
13545 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13546 nodePush(ctxt, ctxt->myDoc->children);
13547 ctxt->instate = XML_PARSER_CONTENT;
13548 ctxt->depth = oldctxt->depth + 1;
13549
13550 ctxt->validate = 0;
13551 ctxt->loadsubset = oldctxt->loadsubset;
13552 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13553 /*
13554 * ID/IDREF registration will be done in xmlValidateElement below
13555 */
13556 ctxt->loadsubset |= XML_SKIP_IDS;
13557 }
13558 ctxt->dictNames = oldctxt->dictNames;
13559 ctxt->attsDefault = oldctxt->attsDefault;
13560 ctxt->attsSpecial = oldctxt->attsSpecial;
13561
13562 xmlParseContent(ctxt);
13563 if ((RAW == '<') && (NXT(1) == '/')) {
13564 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13565 } else if (RAW != 0) {
13566 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13567 }
13568 if (ctxt->node != ctxt->myDoc->children) {
13569 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13570 }
13571
13572 if (!ctxt->wellFormed) {
13573 if (ctxt->errNo == 0)
13574 ret = XML_ERR_INTERNAL_ERROR;
13575 else
13576 ret = (xmlParserErrors)ctxt->errNo;
13577 } else {
13578 ret = XML_ERR_OK;
13579 }
13580
13581 if ((lst != NULL) && (ret == XML_ERR_OK)) {
13582 xmlNodePtr cur;
13583
13584 /*
13585 * Return the newly created nodeset after unlinking it from
13586 * they pseudo parent.
13587 */
13588 cur = ctxt->myDoc->children->children;
13589 *lst = cur;
13590 while (cur != NULL) {
13591#ifdef LIBXML_VALID_ENABLED
13592 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13593 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13594 (cur->type == XML_ELEMENT_NODE)) {
13595 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13596 oldctxt->myDoc, cur);
13597 }
13598#endif /* LIBXML_VALID_ENABLED */
13599 cur->parent = NULL;
13600 cur = cur->next;
13601 }
13602 ctxt->myDoc->children->children = NULL;
13603 }
13604 if (ctxt->myDoc != NULL) {
13605 xmlFreeNode(ctxt->myDoc->children);
13606 ctxt->myDoc->children = content;
13607 ctxt->myDoc->last = last;
13608 }
13609
13610 /*
13611 * Record in the parent context the number of entities replacement
13612 * done when parsing that reference.
13613 */
13614 if (oldctxt != NULL)
13615 oldctxt->nbentities += ctxt->nbentities;
13616
13617 /*
13618 * Also record the last error if any
13619 */
13620 if (ctxt->lastError.code != XML_ERR_OK)
13621 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13622
13623 ctxt->sax = oldsax;
13624 ctxt->dict = NULL;
13625 ctxt->attsDefault = NULL;
13626 ctxt->attsSpecial = NULL;
13627 xmlFreeParserCtxt(ctxt);
13628 if (newDoc != NULL) {
13629 xmlFreeDoc(newDoc);
13630 }
13631
13632 return(ret);
13633}
13634
13635/**
13636 * xmlParseInNodeContext:
13637 * @node: the context node
13638 * @data: the input string
13639 * @datalen: the input string length in bytes
13640 * @options: a combination of xmlParserOption
13641 * @lst: the return value for the set of parsed nodes
13642 *
13643 * Parse a well-balanced chunk of an XML document
13644 * within the context (DTD, namespaces, etc ...) of the given node.
13645 *
13646 * The allowed sequence for the data is a Well Balanced Chunk defined by
13647 * the content production in the XML grammar:
13648 *
13649 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13650 *
13651 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13652 * error code otherwise
13653 */
13654xmlParserErrors
13655xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13656 int options, xmlNodePtr *lst) {
13657#ifdef SAX2
13658 xmlParserCtxtPtr ctxt;
13659 xmlDocPtr doc = NULL;
13660 xmlNodePtr fake, cur;
13661 int nsnr = 0;
13662
13663 xmlParserErrors ret = XML_ERR_OK;
13664
13665 /*
13666 * check all input parameters, grab the document
13667 */
13668 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13669 return(XML_ERR_INTERNAL_ERROR);
13670 switch (node->type) {
13671 case XML_ELEMENT_NODE:
13672 case XML_ATTRIBUTE_NODE:
13673 case XML_TEXT_NODE:
13674 case XML_CDATA_SECTION_NODE:
13675 case XML_ENTITY_REF_NODE:
13676 case XML_PI_NODE:
13677 case XML_COMMENT_NODE:
13678 case XML_DOCUMENT_NODE:
13679 case XML_HTML_DOCUMENT_NODE:
13680 break;
13681 default:
13682 return(XML_ERR_INTERNAL_ERROR);
13683
13684 }
13685 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13686 (node->type != XML_DOCUMENT_NODE) &&
13687 (node->type != XML_HTML_DOCUMENT_NODE))
13688 node = node->parent;
13689 if (node == NULL)
13690 return(XML_ERR_INTERNAL_ERROR);
13691 if (node->type == XML_ELEMENT_NODE)
13692 doc = node->doc;
13693 else
13694 doc = (xmlDocPtr) node;
13695 if (doc == NULL)
13696 return(XML_ERR_INTERNAL_ERROR);
13697
13698 /*
13699 * allocate a context and set-up everything not related to the
13700 * node position in the tree
13701 */
13702 if (doc->type == XML_DOCUMENT_NODE)
13703 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13704#ifdef LIBXML_HTML_ENABLED
13705 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13706 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13707 /*
13708 * When parsing in context, it makes no sense to add implied
13709 * elements like html/body/etc...
13710 */
13711 options |= HTML_PARSE_NOIMPLIED;
13712 }
13713#endif
13714 else
13715 return(XML_ERR_INTERNAL_ERROR);
13716
13717 if (ctxt == NULL)
13718 return(XML_ERR_NO_MEMORY);
13719
13720 /*
13721 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13722 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13723 * we must wait until the last moment to free the original one.
13724 */
13725 if (doc->dict != NULL) {
13726 if (ctxt->dict != NULL)
13727 xmlDictFree(ctxt->dict);
13728 ctxt->dict = doc->dict;
13729 } else
13730 options |= XML_PARSE_NODICT;
13731
13732 if (doc->encoding != NULL) {
13733 xmlCharEncodingHandlerPtr hdlr;
13734
13735 if (ctxt->encoding != NULL)
13736 xmlFree((xmlChar *) ctxt->encoding);
13737 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13738
13739 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13740 if (hdlr != NULL) {
13741 xmlSwitchToEncoding(ctxt, hdlr);
13742 } else {
13743 return(XML_ERR_UNSUPPORTED_ENCODING);
13744 }
13745 }
13746
13747 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13748 xmlDetectSAX2(ctxt);
13749 ctxt->myDoc = doc;
13750 /* parsing in context, i.e. as within existing content */
13751 ctxt->instate = XML_PARSER_CONTENT;
13752
13753 fake = xmlNewComment(NULL);
13754 if (fake == NULL) {
13755 xmlFreeParserCtxt(ctxt);
13756 return(XML_ERR_NO_MEMORY);
13757 }
13758 xmlAddChild(node, fake);
13759
13760 if (node->type == XML_ELEMENT_NODE) {
13761 nodePush(ctxt, node);
13762 /*
13763 * initialize the SAX2 namespaces stack
13764 */
13765 cur = node;
13766 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13767 xmlNsPtr ns = cur->nsDef;
13768 const xmlChar *iprefix, *ihref;
13769
13770 while (ns != NULL) {
13771 if (ctxt->dict) {
13772 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13773 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13774 } else {
13775 iprefix = ns->prefix;
13776 ihref = ns->href;
13777 }
13778
13779 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13780 nsPush(ctxt, iprefix, ihref);
13781 nsnr++;
13782 }
13783 ns = ns->next;
13784 }
13785 cur = cur->parent;
13786 }
13787 }
13788
13789 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13790 /*
13791 * ID/IDREF registration will be done in xmlValidateElement below
13792 */
13793 ctxt->loadsubset |= XML_SKIP_IDS;
13794 }
13795
13796#ifdef LIBXML_HTML_ENABLED
13797 if (doc->type == XML_HTML_DOCUMENT_NODE)
13798 __htmlParseContent(ctxt);
13799 else
13800#endif
13801 xmlParseContent(ctxt);
13802
13803 nsPop(ctxt, nsnr);
13804 if ((RAW == '<') && (NXT(1) == '/')) {
13805 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13806 } else if (RAW != 0) {
13807 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13808 }
13809 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13810 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13811 ctxt->wellFormed = 0;
13812 }
13813
13814 if (!ctxt->wellFormed) {
13815 if (ctxt->errNo == 0)
13816 ret = XML_ERR_INTERNAL_ERROR;
13817 else
13818 ret = (xmlParserErrors)ctxt->errNo;
13819 } else {
13820 ret = XML_ERR_OK;
13821 }
13822
13823 /*
13824 * Return the newly created nodeset after unlinking it from
13825 * the pseudo sibling.
13826 */
13827
13828 cur = fake->next;
13829 fake->next = NULL;
13830 node->last = fake;
13831
13832 if (cur != NULL) {
13833 cur->prev = NULL;
13834 }
13835
13836 *lst = cur;
13837
13838 while (cur != NULL) {
13839 cur->parent = NULL;
13840 cur = cur->next;
13841 }
13842
13843 xmlUnlinkNode(fake);
13844 xmlFreeNode(fake);
13845
13846
13847 if (ret != XML_ERR_OK) {
13848 xmlFreeNodeList(*lst);
13849 *lst = NULL;
13850 }
13851
13852 if (doc->dict != NULL)
13853 ctxt->dict = NULL;
13854 xmlFreeParserCtxt(ctxt);
13855
13856 return(ret);
13857#else /* !SAX2 */
13858 return(XML_ERR_INTERNAL_ERROR);
13859#endif
13860}
13861
13862#ifdef LIBXML_SAX1_ENABLED
13863/**
13864 * xmlParseBalancedChunkMemoryRecover:
13865 * @doc: the document the chunk pertains to
13866 * @sax: the SAX handler bloc (possibly NULL)
13867 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13868 * @depth: Used for loop detection, use 0
13869 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13870 * @lst: the return value for the set of parsed nodes
13871 * @recover: return nodes even if the data is broken (use 0)
13872 *
13873 *
13874 * Parse a well-balanced chunk of an XML document
13875 * called by the parser
13876 * The allowed sequence for the Well Balanced Chunk is the one defined by
13877 * the content production in the XML grammar:
13878 *
13879 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13880 *
13881 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13882 * the parser error code otherwise
13883 *
13884 * In case recover is set to 1, the nodelist will not be empty even if
13885 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13886 * some extent.
13887 */
13888int
13889xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13890 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13891 int recover) {
13892 xmlParserCtxtPtr ctxt;
13893 xmlDocPtr newDoc;
13894 xmlSAXHandlerPtr oldsax = NULL;
13895 xmlNodePtr content, newRoot;
13896 int size;
13897 int ret = 0;
13898
13899 if (depth > 40) {
13900 return(XML_ERR_ENTITY_LOOP);
13901 }
13902
13903
13904 if (lst != NULL)
13905 *lst = NULL;
13906 if (string == NULL)
13907 return(-1);
13908
13909 size = xmlStrlen(string);
13910
13911 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13912 if (ctxt == NULL) return(-1);
13913 ctxt->userData = ctxt;
13914 if (sax != NULL) {
13915 oldsax = ctxt->sax;
13916 ctxt->sax = sax;
13917 if (user_data != NULL)
13918 ctxt->userData = user_data;
13919 }
13920 newDoc = xmlNewDoc(BAD_CAST "1.0");
13921 if (newDoc == NULL) {
13922 xmlFreeParserCtxt(ctxt);
13923 return(-1);
13924 }
13925 newDoc->properties = XML_DOC_INTERNAL;
13926 if ((doc != NULL) && (doc->dict != NULL)) {
13927 xmlDictFree(ctxt->dict);
13928 ctxt->dict = doc->dict;
13929 xmlDictReference(ctxt->dict);
13930 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13931 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13932 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13933 ctxt->dictNames = 1;
13934 } else {
13935 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13936 }
13937 if (doc != NULL) {
13938 newDoc->intSubset = doc->intSubset;
13939 newDoc->extSubset = doc->extSubset;
13940 }
13941 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13942 if (newRoot == NULL) {
13943 if (sax != NULL)
13944 ctxt->sax = oldsax;
13945 xmlFreeParserCtxt(ctxt);
13946 newDoc->intSubset = NULL;
13947 newDoc->extSubset = NULL;
13948 xmlFreeDoc(newDoc);
13949 return(-1);
13950 }
13951 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13952 nodePush(ctxt, newRoot);
13953 if (doc == NULL) {
13954 ctxt->myDoc = newDoc;
13955 } else {
13956 ctxt->myDoc = newDoc;
13957 newDoc->children->doc = doc;
13958 /* Ensure that doc has XML spec namespace */
13959 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13960 newDoc->oldNs = doc->oldNs;
13961 }
13962 ctxt->instate = XML_PARSER_CONTENT;
13963 ctxt->depth = depth;
13964
13965 /*
13966 * Doing validity checking on chunk doesn't make sense
13967 */
13968 ctxt->validate = 0;
13969 ctxt->loadsubset = 0;
13970 xmlDetectSAX2(ctxt);
13971
13972 if ( doc != NULL ){
13973 content = doc->children;
13974 doc->children = NULL;
13975 xmlParseContent(ctxt);
13976 doc->children = content;
13977 }
13978 else {
13979 xmlParseContent(ctxt);
13980 }
13981 if ((RAW == '<') && (NXT(1) == '/')) {
13982 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13983 } else if (RAW != 0) {
13984 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13985 }
13986 if (ctxt->node != newDoc->children) {
13987 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13988 }
13989
13990 if (!ctxt->wellFormed) {
13991 if (ctxt->errNo == 0)
13992 ret = 1;
13993 else
13994 ret = ctxt->errNo;
13995 } else {
13996 ret = 0;
13997 }
13998
13999 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14000 xmlNodePtr cur;
14001
14002 /*
14003 * Return the newly created nodeset after unlinking it from
14004 * they pseudo parent.
14005 */
14006 cur = newDoc->children->children;
14007 *lst = cur;
14008 while (cur != NULL) {
14009 xmlSetTreeDoc(cur, doc);
14010 cur->parent = NULL;
14011 cur = cur->next;
14012 }
14013 newDoc->children->children = NULL;
14014 }
14015
14016 if (sax != NULL)
14017 ctxt->sax = oldsax;
14018 xmlFreeParserCtxt(ctxt);
14019 newDoc->intSubset = NULL;
14020 newDoc->extSubset = NULL;
14021 newDoc->oldNs = NULL;
14022 xmlFreeDoc(newDoc);
14023
14024 return(ret);
14025}
14026
14027/**
14028 * xmlSAXParseEntity:
14029 * @sax: the SAX handler block
14030 * @filename: the filename
14031 *
14032 * parse an XML external entity out of context and build a tree.
14033 * It use the given SAX function block to handle the parsing callback.
14034 * If sax is NULL, fallback to the default DOM tree building routines.
14035 *
14036 * [78] extParsedEnt ::= TextDecl? content
14037 *
14038 * This correspond to a "Well Balanced" chunk
14039 *
14040 * Returns the resulting document tree
14041 */
14042
14043xmlDocPtr
14044xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14045 xmlDocPtr ret;
14046 xmlParserCtxtPtr ctxt;
14047
14048 ctxt = xmlCreateFileParserCtxt(filename);
14049 if (ctxt == NULL) {
14050 return(NULL);
14051 }
14052 if (sax != NULL) {
14053 if (ctxt->sax != NULL)
14054 xmlFree(ctxt->sax);
14055 ctxt->sax = sax;
14056 ctxt->userData = NULL;
14057 }
14058
14059 xmlParseExtParsedEnt(ctxt);
14060
14061 if (ctxt->wellFormed)
14062 ret = ctxt->myDoc;
14063 else {
14064 ret = NULL;
14065 xmlFreeDoc(ctxt->myDoc);
14066 ctxt->myDoc = NULL;
14067 }
14068 if (sax != NULL)
14069 ctxt->sax = NULL;
14070 xmlFreeParserCtxt(ctxt);
14071
14072 return(ret);
14073}
14074
14075/**
14076 * xmlParseEntity:
14077 * @filename: the filename
14078 *
14079 * parse an XML external entity out of context and build a tree.
14080 *
14081 * [78] extParsedEnt ::= TextDecl? content
14082 *
14083 * This correspond to a "Well Balanced" chunk
14084 *
14085 * Returns the resulting document tree
14086 */
14087
14088xmlDocPtr
14089xmlParseEntity(const char *filename) {
14090 return(xmlSAXParseEntity(NULL, filename));
14091}
14092#endif /* LIBXML_SAX1_ENABLED */
14093
14094/**
14095 * xmlCreateEntityParserCtxtInternal:
14096 * @URL: the entity URL
14097 * @ID: the entity PUBLIC ID
14098 * @base: a possible base for the target URI
14099 * @pctx: parser context used to set options on new context
14100 *
14101 * Create a parser context for an external entity
14102 * Automatic support for ZLIB/Compress compressed document is provided
14103 * by default if found at compile-time.
14104 *
14105 * Returns the new parser context or NULL
14106 */
14107static xmlParserCtxtPtr
14108xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14109 const xmlChar *base, xmlParserCtxtPtr pctx) {
14110 xmlParserCtxtPtr ctxt;
14111 xmlParserInputPtr inputStream;
14112 char *directory = NULL;
14113 xmlChar *uri;
14114
14115 ctxt = xmlNewParserCtxt();
14116 if (ctxt == NULL) {
14117 return(NULL);
14118 }
14119
14120 if (pctx != NULL) {
14121 ctxt->options = pctx->options;
14122 ctxt->_private = pctx->_private;
14123 }
14124
14125 uri = xmlBuildURI(URL, base);
14126
14127 if (uri == NULL) {
14128 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14129 if (inputStream == NULL) {
14130 xmlFreeParserCtxt(ctxt);
14131 return(NULL);
14132 }
14133
14134 inputPush(ctxt, inputStream);
14135
14136 if ((ctxt->directory == NULL) && (directory == NULL))
14137 directory = xmlParserGetDirectory((char *)URL);
14138 if ((ctxt->directory == NULL) && (directory != NULL))
14139 ctxt->directory = directory;
14140 } else {
14141 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14142 if (inputStream == NULL) {
14143 xmlFree(uri);
14144 xmlFreeParserCtxt(ctxt);
14145 return(NULL);
14146 }
14147
14148 inputPush(ctxt, inputStream);
14149
14150 if ((ctxt->directory == NULL) && (directory == NULL))
14151 directory = xmlParserGetDirectory((char *)uri);
14152 if ((ctxt->directory == NULL) && (directory != NULL))
14153 ctxt->directory = directory;
14154 xmlFree(uri);
14155 }
14156 return(ctxt);
14157}
14158
14159/**
14160 * xmlCreateEntityParserCtxt:
14161 * @URL: the entity URL
14162 * @ID: the entity PUBLIC ID
14163 * @base: a possible base for the target URI
14164 *
14165 * Create a parser context for an external entity
14166 * Automatic support for ZLIB/Compress compressed document is provided
14167 * by default if found at compile-time.
14168 *
14169 * Returns the new parser context or NULL
14170 */
14171xmlParserCtxtPtr
14172xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14173 const xmlChar *base) {
14174 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14175
14176}
14177
14178/************************************************************************
14179 * *
14180 * Front ends when parsing from a file *
14181 * *
14182 ************************************************************************/
14183
14184/**
14185 * xmlCreateURLParserCtxt:
14186 * @filename: the filename or URL
14187 * @options: a combination of xmlParserOption
14188 *
14189 * Create a parser context for a file or URL content.
14190 * Automatic support for ZLIB/Compress compressed document is provided
14191 * by default if found at compile-time and for file accesses
14192 *
14193 * Returns the new parser context or NULL
14194 */
14195xmlParserCtxtPtr
14196xmlCreateURLParserCtxt(const char *filename, int options)
14197{
14198 xmlParserCtxtPtr ctxt;
14199 xmlParserInputPtr inputStream;
14200 char *directory = NULL;
14201
14202 ctxt = xmlNewParserCtxt();
14203 if (ctxt == NULL) {
14204 xmlErrMemory(NULL, "cannot allocate parser context");
14205 return(NULL);
14206 }
14207
14208 if (options)
14209 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14210 ctxt->linenumbers = 1;
14211
14212 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14213 if (inputStream == NULL) {
14214 xmlFreeParserCtxt(ctxt);
14215 return(NULL);
14216 }
14217
14218 inputPush(ctxt, inputStream);
14219 if ((ctxt->directory == NULL) && (directory == NULL))
14220 directory = xmlParserGetDirectory(filename);
14221 if ((ctxt->directory == NULL) && (directory != NULL))
14222 ctxt->directory = directory;
14223
14224 return(ctxt);
14225}
14226
14227/**
14228 * xmlCreateFileParserCtxt:
14229 * @filename: the filename
14230 *
14231 * Create a parser context for a file content.
14232 * Automatic support for ZLIB/Compress compressed document is provided
14233 * by default if found at compile-time.
14234 *
14235 * Returns the new parser context or NULL
14236 */
14237xmlParserCtxtPtr
14238xmlCreateFileParserCtxt(const char *filename)
14239{
14240 return(xmlCreateURLParserCtxt(filename, 0));
14241}
14242
14243#ifdef LIBXML_SAX1_ENABLED
14244/**
14245 * xmlSAXParseFileWithData:
14246 * @sax: the SAX handler block
14247 * @filename: the filename
14248 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14249 * documents
14250 * @data: the userdata
14251 *
14252 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14253 * compressed document is provided by default if found at compile-time.
14254 * It use the given SAX function block to handle the parsing callback.
14255 * If sax is NULL, fallback to the default DOM tree building routines.
14256 *
14257 * User data (void *) is stored within the parser context in the
14258 * context's _private member, so it is available nearly everywhere in libxml
14259 *
14260 * Returns the resulting document tree
14261 */
14262
14263xmlDocPtr
14264xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14265 int recovery, void *data) {
14266 xmlDocPtr ret;
14267 xmlParserCtxtPtr ctxt;
14268
14269 xmlInitParser();
14270
14271 ctxt = xmlCreateFileParserCtxt(filename);
14272 if (ctxt == NULL) {
14273 return(NULL);
14274 }
14275 if (sax != NULL) {
14276 if (ctxt->sax != NULL)
14277 xmlFree(ctxt->sax);
14278 ctxt->sax = sax;
14279 }
14280 xmlDetectSAX2(ctxt);
14281 if (data!=NULL) {
14282 ctxt->_private = data;
14283 }
14284
14285 if (ctxt->directory == NULL)
14286 ctxt->directory = xmlParserGetDirectory(filename);
14287
14288 ctxt->recovery = recovery;
14289
14290 xmlParseDocument(ctxt);
14291
14292 if ((ctxt->wellFormed) || recovery) {
14293 ret = ctxt->myDoc;
14294 if (ret != NULL) {
14295 if (ctxt->input->buf->compressed > 0)
14296 ret->compression = 9;
14297 else
14298 ret->compression = ctxt->input->buf->compressed;
14299 }
14300 }
14301 else {
14302 ret = NULL;
14303 xmlFreeDoc(ctxt->myDoc);
14304 ctxt->myDoc = NULL;
14305 }
14306 if (sax != NULL)
14307 ctxt->sax = NULL;
14308 xmlFreeParserCtxt(ctxt);
14309
14310 return(ret);
14311}
14312
14313/**
14314 * xmlSAXParseFile:
14315 * @sax: the SAX handler block
14316 * @filename: the filename
14317 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14318 * documents
14319 *
14320 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14321 * compressed document is provided by default if found at compile-time.
14322 * It use the given SAX function block to handle the parsing callback.
14323 * If sax is NULL, fallback to the default DOM tree building routines.
14324 *
14325 * Returns the resulting document tree
14326 */
14327
14328xmlDocPtr
14329xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14330 int recovery) {
14331 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14332}
14333
14334/**
14335 * xmlRecoverDoc:
14336 * @cur: a pointer to an array of xmlChar
14337 *
14338 * parse an XML in-memory document and build a tree.
14339 * In the case the document is not Well Formed, a attempt to build a
14340 * tree is tried anyway
14341 *
14342 * Returns the resulting document tree or NULL in case of failure
14343 */
14344
14345xmlDocPtr
14346xmlRecoverDoc(const xmlChar *cur) {
14347 return(xmlSAXParseDoc(NULL, cur, 1));
14348}
14349
14350/**
14351 * xmlParseFile:
14352 * @filename: the filename
14353 *
14354 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14355 * compressed document is provided by default if found at compile-time.
14356 *
14357 * Returns the resulting document tree if the file was wellformed,
14358 * NULL otherwise.
14359 */
14360
14361xmlDocPtr
14362xmlParseFile(const char *filename) {
14363 return(xmlSAXParseFile(NULL, filename, 0));
14364}
14365
14366/**
14367 * xmlRecoverFile:
14368 * @filename: the filename
14369 *
14370 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14371 * compressed document is provided by default if found at compile-time.
14372 * In the case the document is not Well Formed, it attempts to build
14373 * a tree anyway
14374 *
14375 * Returns the resulting document tree or NULL in case of failure
14376 */
14377
14378xmlDocPtr
14379xmlRecoverFile(const char *filename) {
14380 return(xmlSAXParseFile(NULL, filename, 1));
14381}
14382
14383
14384/**
14385 * xmlSetupParserForBuffer:
14386 * @ctxt: an XML parser context
14387 * @buffer: a xmlChar * buffer
14388 * @filename: a file name
14389 *
14390 * Setup the parser context to parse a new buffer; Clears any prior
14391 * contents from the parser context. The buffer parameter must not be
14392 * NULL, but the filename parameter can be
14393 */
14394void
14395xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14396 const char* filename)
14397{
14398 xmlParserInputPtr input;
14399
14400 if ((ctxt == NULL) || (buffer == NULL))
14401 return;
14402
14403 input = xmlNewInputStream(ctxt);
14404 if (input == NULL) {
14405 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14406 xmlClearParserCtxt(ctxt);
14407 return;
14408 }
14409
14410 xmlClearParserCtxt(ctxt);
14411 if (filename != NULL)
14412 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14413 input->base = buffer;
14414 input->cur = buffer;
14415 input->end = &buffer[xmlStrlen(buffer)];
14416 inputPush(ctxt, input);
14417}
14418
14419/**
14420 * xmlSAXUserParseFile:
14421 * @sax: a SAX handler
14422 * @user_data: The user data returned on SAX callbacks
14423 * @filename: a file name
14424 *
14425 * parse an XML file and call the given SAX handler routines.
14426 * Automatic support for ZLIB/Compress compressed document is provided
14427 *
14428 * Returns 0 in case of success or a error number otherwise
14429 */
14430int
14431xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14432 const char *filename) {
14433 int ret = 0;
14434 xmlParserCtxtPtr ctxt;
14435
14436 ctxt = xmlCreateFileParserCtxt(filename);
14437 if (ctxt == NULL) return -1;
14438 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14439 xmlFree(ctxt->sax);
14440 ctxt->sax = sax;
14441 xmlDetectSAX2(ctxt);
14442
14443 if (user_data != NULL)
14444 ctxt->userData = user_data;
14445
14446 xmlParseDocument(ctxt);
14447
14448 if (ctxt->wellFormed)
14449 ret = 0;
14450 else {
14451 if (ctxt->errNo != 0)
14452 ret = ctxt->errNo;
14453 else
14454 ret = -1;
14455 }
14456 if (sax != NULL)
14457 ctxt->sax = NULL;
14458 if (ctxt->myDoc != NULL) {
14459 xmlFreeDoc(ctxt->myDoc);
14460 ctxt->myDoc = NULL;
14461 }
14462 xmlFreeParserCtxt(ctxt);
14463
14464 return ret;
14465}
14466#endif /* LIBXML_SAX1_ENABLED */
14467
14468/************************************************************************
14469 * *
14470 * Front ends when parsing from memory *
14471 * *
14472 ************************************************************************/
14473
14474/**
14475 * xmlCreateMemoryParserCtxt:
14476 * @buffer: a pointer to a char array
14477 * @size: the size of the array
14478 *
14479 * Create a parser context for an XML in-memory document.
14480 *
14481 * Returns the new parser context or NULL
14482 */
14483xmlParserCtxtPtr
14484xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14485 xmlParserCtxtPtr ctxt;
14486 xmlParserInputPtr input;
14487 xmlParserInputBufferPtr buf;
14488
14489 if (buffer == NULL)
14490 return(NULL);
14491 if (size <= 0)
14492 return(NULL);
14493
14494 ctxt = xmlNewParserCtxt();
14495 if (ctxt == NULL)
14496 return(NULL);
14497
14498 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14499 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14500 if (buf == NULL) {
14501 xmlFreeParserCtxt(ctxt);
14502 return(NULL);
14503 }
14504
14505 input = xmlNewInputStream(ctxt);
14506 if (input == NULL) {
14507 xmlFreeParserInputBuffer(buf);
14508 xmlFreeParserCtxt(ctxt);
14509 return(NULL);
14510 }
14511
14512 input->filename = NULL;
14513 input->buf = buf;
14514 xmlBufResetInput(input->buf->buffer, input);
14515
14516 inputPush(ctxt, input);
14517 return(ctxt);
14518}
14519
14520#ifdef LIBXML_SAX1_ENABLED
14521/**
14522 * xmlSAXParseMemoryWithData:
14523 * @sax: the SAX handler block
14524 * @buffer: an pointer to a char array
14525 * @size: the size of the array
14526 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14527 * documents
14528 * @data: the userdata
14529 *
14530 * parse an XML in-memory block and use the given SAX function block
14531 * to handle the parsing callback. If sax is NULL, fallback to the default
14532 * DOM tree building routines.
14533 *
14534 * User data (void *) is stored within the parser context in the
14535 * context's _private member, so it is available nearly everywhere in libxml
14536 *
14537 * Returns the resulting document tree
14538 */
14539
14540xmlDocPtr
14541xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14542 int size, int recovery, void *data) {
14543 xmlDocPtr ret;
14544 xmlParserCtxtPtr ctxt;
14545
14546 xmlInitParser();
14547
14548 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14549 if (ctxt == NULL) return(NULL);
14550 if (sax != NULL) {
14551 if (ctxt->sax != NULL)
14552 xmlFree(ctxt->sax);
14553 ctxt->sax = sax;
14554 }
14555 xmlDetectSAX2(ctxt);
14556 if (data!=NULL) {
14557 ctxt->_private=data;
14558 }
14559
14560 ctxt->recovery = recovery;
14561
14562 xmlParseDocument(ctxt);
14563
14564 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14565 else {
14566 ret = NULL;
14567 xmlFreeDoc(ctxt->myDoc);
14568 ctxt->myDoc = NULL;
14569 }
14570 if (sax != NULL)
14571 ctxt->sax = NULL;
14572 xmlFreeParserCtxt(ctxt);
14573
14574 return(ret);
14575}
14576
14577/**
14578 * xmlSAXParseMemory:
14579 * @sax: the SAX handler block
14580 * @buffer: an pointer to a char array
14581 * @size: the size of the array
14582 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14583 * documents
14584 *
14585 * parse an XML in-memory block and use the given SAX function block
14586 * to handle the parsing callback. If sax is NULL, fallback to the default
14587 * DOM tree building routines.
14588 *
14589 * Returns the resulting document tree
14590 */
14591xmlDocPtr
14592xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14593 int size, int recovery) {
14594 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14595}
14596
14597/**
14598 * xmlParseMemory:
14599 * @buffer: an pointer to a char array
14600 * @size: the size of the array
14601 *
14602 * parse an XML in-memory block and build a tree.
14603 *
14604 * Returns the resulting document tree
14605 */
14606
14607xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14608 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14609}
14610
14611/**
14612 * xmlRecoverMemory:
14613 * @buffer: an pointer to a char array
14614 * @size: the size of the array
14615 *
14616 * parse an XML in-memory block and build a tree.
14617 * In the case the document is not Well Formed, an attempt to
14618 * build a tree is tried anyway
14619 *
14620 * Returns the resulting document tree or NULL in case of error
14621 */
14622
14623xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14624 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14625}
14626
14627/**
14628 * xmlSAXUserParseMemory:
14629 * @sax: a SAX handler
14630 * @user_data: The user data returned on SAX callbacks
14631 * @buffer: an in-memory XML document input
14632 * @size: the length of the XML document in bytes
14633 *
14634 * A better SAX parsing routine.
14635 * parse an XML in-memory buffer and call the given SAX handler routines.
14636 *
14637 * Returns 0 in case of success or a error number otherwise
14638 */
14639int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14640 const char *buffer, int size) {
14641 int ret = 0;
14642 xmlParserCtxtPtr ctxt;
14643
14644 xmlInitParser();
14645
14646 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14647 if (ctxt == NULL) return -1;
14648 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14649 xmlFree(ctxt->sax);
14650 ctxt->sax = sax;
14651 xmlDetectSAX2(ctxt);
14652
14653 if (user_data != NULL)
14654 ctxt->userData = user_data;
14655
14656 xmlParseDocument(ctxt);
14657
14658 if (ctxt->wellFormed)
14659 ret = 0;
14660 else {
14661 if (ctxt->errNo != 0)
14662 ret = ctxt->errNo;
14663 else
14664 ret = -1;
14665 }
14666 if (sax != NULL)
14667 ctxt->sax = NULL;
14668 if (ctxt->myDoc != NULL) {
14669 xmlFreeDoc(ctxt->myDoc);
14670 ctxt->myDoc = NULL;
14671 }
14672 xmlFreeParserCtxt(ctxt);
14673
14674 return ret;
14675}
14676#endif /* LIBXML_SAX1_ENABLED */
14677
14678/**
14679 * xmlCreateDocParserCtxt:
14680 * @cur: a pointer to an array of xmlChar
14681 *
14682 * Creates a parser context for an XML in-memory document.
14683 *
14684 * Returns the new parser context or NULL
14685 */
14686xmlParserCtxtPtr
14687xmlCreateDocParserCtxt(const xmlChar *cur) {
14688 int len;
14689
14690 if (cur == NULL)
14691 return(NULL);
14692 len = xmlStrlen(cur);
14693 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14694}
14695
14696#ifdef LIBXML_SAX1_ENABLED
14697/**
14698 * xmlSAXParseDoc:
14699 * @sax: the SAX handler block
14700 * @cur: a pointer to an array of xmlChar
14701 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14702 * documents
14703 *
14704 * parse an XML in-memory document and build a tree.
14705 * It use the given SAX function block to handle the parsing callback.
14706 * If sax is NULL, fallback to the default DOM tree building routines.
14707 *
14708 * Returns the resulting document tree
14709 */
14710
14711xmlDocPtr
14712xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14713 xmlDocPtr ret;
14714 xmlParserCtxtPtr ctxt;
14715 xmlSAXHandlerPtr oldsax = NULL;
14716
14717 if (cur == NULL) return(NULL);
14718
14719
14720 ctxt = xmlCreateDocParserCtxt(cur);
14721 if (ctxt == NULL) return(NULL);
14722 if (sax != NULL) {
14723 oldsax = ctxt->sax;
14724 ctxt->sax = sax;
14725 ctxt->userData = NULL;
14726 }
14727 xmlDetectSAX2(ctxt);
14728
14729 xmlParseDocument(ctxt);
14730 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14731 else {
14732 ret = NULL;
14733 xmlFreeDoc(ctxt->myDoc);
14734 ctxt->myDoc = NULL;
14735 }
14736 if (sax != NULL)
14737 ctxt->sax = oldsax;
14738 xmlFreeParserCtxt(ctxt);
14739
14740 return(ret);
14741}
14742
14743/**
14744 * xmlParseDoc:
14745 * @cur: a pointer to an array of xmlChar
14746 *
14747 * parse an XML in-memory document and build a tree.
14748 *
14749 * Returns the resulting document tree
14750 */
14751
14752xmlDocPtr
14753xmlParseDoc(const xmlChar *cur) {
14754 return(xmlSAXParseDoc(NULL, cur, 0));
14755}
14756#endif /* LIBXML_SAX1_ENABLED */
14757
14758#ifdef LIBXML_LEGACY_ENABLED
14759/************************************************************************
14760 * *
14761 * Specific function to keep track of entities references *
14762 * and used by the XSLT debugger *
14763 * *
14764 ************************************************************************/
14765
14766static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14767
14768/**
14769 * xmlAddEntityReference:
14770 * @ent : A valid entity
14771 * @firstNode : A valid first node for children of entity
14772 * @lastNode : A valid last node of children entity
14773 *
14774 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14775 */
14776static void
14777xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14778 xmlNodePtr lastNode)
14779{
14780 if (xmlEntityRefFunc != NULL) {
14781 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14782 }
14783}
14784
14785
14786/**
14787 * xmlSetEntityReferenceFunc:
14788 * @func: A valid function
14789 *
14790 * Set the function to call call back when a xml reference has been made
14791 */
14792void
14793xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14794{
14795 xmlEntityRefFunc = func;
14796}
14797#endif /* LIBXML_LEGACY_ENABLED */
14798
14799/************************************************************************
14800 * *
14801 * Miscellaneous *
14802 * *
14803 ************************************************************************/
14804
14805#ifdef LIBXML_XPATH_ENABLED
14806#include <libxml/xpath.h>
14807#endif
14808
14809extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14810static int xmlParserInitialized = 0;
14811
14812/**
14813 * xmlInitParser:
14814 *
14815 * Initialization function for the XML parser.
14816 * This is not reentrant. Call once before processing in case of
14817 * use in multithreaded programs.
14818 */
14819
14820void
14821xmlInitParser(void) {
14822 if (xmlParserInitialized != 0)
14823 return;
14824
14825#ifdef LIBXML_THREAD_ENABLED
14826 __xmlGlobalInitMutexLock();
14827 if (xmlParserInitialized == 0) {
14828#endif
14829 xmlInitThreads();
14830 xmlInitGlobals();
14831 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14832 (xmlGenericError == NULL))
14833 initGenericErrorDefaultFunc(NULL);
14834 xmlInitMemory();
14835 xmlInitializeDict();
14836 xmlInitCharEncodingHandlers();
14837 xmlDefaultSAXHandlerInit();
14838 xmlRegisterDefaultInputCallbacks();
14839#ifdef LIBXML_OUTPUT_ENABLED
14840 xmlRegisterDefaultOutputCallbacks();
14841#endif /* LIBXML_OUTPUT_ENABLED */
14842#ifdef LIBXML_HTML_ENABLED
14843 htmlInitAutoClose();
14844 htmlDefaultSAXHandlerInit();
14845#endif
14846#ifdef LIBXML_XPATH_ENABLED
14847 xmlXPathInit();
14848#endif
14849 xmlParserInitialized = 1;
14850#ifdef LIBXML_THREAD_ENABLED
14851 }
14852 __xmlGlobalInitMutexUnlock();
14853#endif
14854}
14855
14856/**
14857 * xmlCleanupParser:
14858 *
14859 * This function name is somewhat misleading. It does not clean up
14860 * parser state, it cleans up memory allocated by the library itself.
14861 * It is a cleanup function for the XML library. It tries to reclaim all
14862 * related global memory allocated for the library processing.
14863 * It doesn't deallocate any document related memory. One should
14864 * call xmlCleanupParser() only when the process has finished using
14865 * the library and all XML/HTML documents built with it.
14866 * See also xmlInitParser() which has the opposite function of preparing
14867 * the library for operations.
14868 *
14869 * WARNING: if your application is multithreaded or has plugin support
14870 * calling this may crash the application if another thread or
14871 * a plugin is still using libxml2. It's sometimes very hard to
14872 * guess if libxml2 is in use in the application, some libraries
14873 * or plugins may use it without notice. In case of doubt abstain
14874 * from calling this function or do it just before calling exit()
14875 * to avoid leak reports from valgrind !
14876 */
14877
14878void
14879xmlCleanupParser(void) {
14880 if (!xmlParserInitialized)
14881 return;
14882
14883 xmlCleanupCharEncodingHandlers();
14884#ifdef LIBXML_CATALOG_ENABLED
14885 xmlCatalogCleanup();
14886#endif
14887 xmlDictCleanup();
14888 xmlCleanupInputCallbacks();
14889#ifdef LIBXML_OUTPUT_ENABLED
14890 xmlCleanupOutputCallbacks();
14891#endif
14892#ifdef LIBXML_SCHEMAS_ENABLED
14893 xmlSchemaCleanupTypes();
14894 xmlRelaxNGCleanupTypes();
14895#endif
14896 xmlResetLastError();
14897 xmlCleanupGlobals();
14898 xmlCleanupThreads(); /* must be last if called not from the main thread */
14899 xmlCleanupMemory();
14900 xmlParserInitialized = 0;
14901}
14902
14903/************************************************************************
14904 * *
14905 * New set (2.6.0) of simpler and more flexible APIs *
14906 * *
14907 ************************************************************************/
14908
14909/**
14910 * DICT_FREE:
14911 * @str: a string
14912 *
14913 * Free a string if it is not owned by the "dict" dictionnary in the
14914 * current scope
14915 */
14916#define DICT_FREE(str) \
14917 if ((str) && ((!dict) || \
14918 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14919 xmlFree((char *)(str));
14920
14921/**
14922 * xmlCtxtReset:
14923 * @ctxt: an XML parser context
14924 *
14925 * Reset a parser context
14926 */
14927void
14928xmlCtxtReset(xmlParserCtxtPtr ctxt)
14929{
14930 xmlParserInputPtr input;
14931 xmlDictPtr dict;
14932
14933 if (ctxt == NULL)
14934 return;
14935
14936 dict = ctxt->dict;
14937
14938 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14939 xmlFreeInputStream(input);
14940 }
14941 ctxt->inputNr = 0;
14942 ctxt->input = NULL;
14943
14944 ctxt->spaceNr = 0;
14945 if (ctxt->spaceTab != NULL) {
14946 ctxt->spaceTab[0] = -1;
14947 ctxt->space = &ctxt->spaceTab[0];
14948 } else {
14949 ctxt->space = NULL;
14950 }
14951
14952
14953 ctxt->nodeNr = 0;
14954 ctxt->node = NULL;
14955
14956 ctxt->nameNr = 0;
14957 ctxt->name = NULL;
14958
14959 DICT_FREE(ctxt->version);
14960 ctxt->version = NULL;
14961 DICT_FREE(ctxt->encoding);
14962 ctxt->encoding = NULL;
14963 DICT_FREE(ctxt->directory);
14964 ctxt->directory = NULL;
14965 DICT_FREE(ctxt->extSubURI);
14966 ctxt->extSubURI = NULL;
14967 DICT_FREE(ctxt->extSubSystem);
14968 ctxt->extSubSystem = NULL;
14969 if (ctxt->myDoc != NULL)
14970 xmlFreeDoc(ctxt->myDoc);
14971 ctxt->myDoc = NULL;
14972
14973 ctxt->standalone = -1;
14974 ctxt->hasExternalSubset = 0;
14975 ctxt->hasPErefs = 0;
14976 ctxt->html = 0;
14977 ctxt->external = 0;
14978 ctxt->instate = XML_PARSER_START;
14979 ctxt->token = 0;
14980
14981 ctxt->wellFormed = 1;
14982 ctxt->nsWellFormed = 1;
14983 ctxt->disableSAX = 0;
14984 ctxt->valid = 1;
14985#if 0
14986 ctxt->vctxt.userData = ctxt;
14987 ctxt->vctxt.error = xmlParserValidityError;
14988 ctxt->vctxt.warning = xmlParserValidityWarning;
14989#endif
14990 ctxt->record_info = 0;
14991 ctxt->nbChars = 0;
14992 ctxt->checkIndex = 0;
14993 ctxt->inSubset = 0;
14994 ctxt->errNo = XML_ERR_OK;
14995 ctxt->depth = 0;
14996 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14997 ctxt->catalogs = NULL;
14998 ctxt->nbentities = 0;
14999 ctxt->sizeentities = 0;
15000 ctxt->sizeentcopy = 0;
15001 xmlInitNodeInfoSeq(&ctxt->node_seq);
15002
15003 if (ctxt->attsDefault != NULL) {
15004 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15005 ctxt->attsDefault = NULL;
15006 }
15007 if (ctxt->attsSpecial != NULL) {
15008 xmlHashFree(ctxt->attsSpecial, NULL);
15009 ctxt->attsSpecial = NULL;
15010 }
15011
15012#ifdef LIBXML_CATALOG_ENABLED
15013 if (ctxt->catalogs != NULL)
15014 xmlCatalogFreeLocal(ctxt->catalogs);
15015#endif
15016 if (ctxt->lastError.code != XML_ERR_OK)
15017 xmlResetError(&ctxt->lastError);
15018}
15019
15020/**
15021 * xmlCtxtResetPush:
15022 * @ctxt: an XML parser context
15023 * @chunk: a pointer to an array of chars
15024 * @size: number of chars in the array
15025 * @filename: an optional file name or URI
15026 * @encoding: the document encoding, or NULL
15027 *
15028 * Reset a push parser context
15029 *
15030 * Returns 0 in case of success and 1 in case of error
15031 */
15032int
15033xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15034 int size, const char *filename, const char *encoding)
15035{
15036 xmlParserInputPtr inputStream;
15037 xmlParserInputBufferPtr buf;
15038 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15039
15040 if (ctxt == NULL)
15041 return(1);
15042
15043 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15044 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15045
15046 buf = xmlAllocParserInputBuffer(enc);
15047 if (buf == NULL)
15048 return(1);
15049
15050 if (ctxt == NULL) {
15051 xmlFreeParserInputBuffer(buf);
15052 return(1);
15053 }
15054
15055 xmlCtxtReset(ctxt);
15056
15057 if (ctxt->pushTab == NULL) {
15058 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15059 sizeof(xmlChar *));
15060 if (ctxt->pushTab == NULL) {
15061 xmlErrMemory(ctxt, NULL);
15062 xmlFreeParserInputBuffer(buf);
15063 return(1);
15064 }
15065 }
15066
15067 if (filename == NULL) {
15068 ctxt->directory = NULL;
15069 } else {
15070 ctxt->directory = xmlParserGetDirectory(filename);
15071 }
15072
15073 inputStream = xmlNewInputStream(ctxt);
15074 if (inputStream == NULL) {
15075 xmlFreeParserInputBuffer(buf);
15076 return(1);
15077 }
15078
15079 if (filename == NULL)
15080 inputStream->filename = NULL;
15081 else
15082 inputStream->filename = (char *)
15083 xmlCanonicPath((const xmlChar *) filename);
15084 inputStream->buf = buf;
15085 xmlBufResetInput(buf->buffer, inputStream);
15086
15087 inputPush(ctxt, inputStream);
15088
15089 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15090 (ctxt->input->buf != NULL)) {
15091 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15092 size_t cur = ctxt->input->cur - ctxt->input->base;
15093
15094 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15095
15096 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
15097#ifdef DEBUG_PUSH
15098 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15099#endif
15100 }
15101
15102 if (encoding != NULL) {
15103 xmlCharEncodingHandlerPtr hdlr;
15104
15105 if (ctxt->encoding != NULL)
15106 xmlFree((xmlChar *) ctxt->encoding);
15107 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15108
15109 hdlr = xmlFindCharEncodingHandler(encoding);
15110 if (hdlr != NULL) {
15111 xmlSwitchToEncoding(ctxt, hdlr);
15112 } else {
15113 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15114 "Unsupported encoding %s\n", BAD_CAST encoding);
15115 }
15116 } else if (enc != XML_CHAR_ENCODING_NONE) {
15117 xmlSwitchEncoding(ctxt, enc);
15118 }
15119
15120 return(0);
15121}
15122
15123
15124/**
15125 * xmlCtxtUseOptionsInternal:
15126 * @ctxt: an XML parser context
15127 * @options: a combination of xmlParserOption
15128 * @encoding: the user provided encoding to use
15129 *
15130 * Applies the options to the parser context
15131 *
15132 * Returns 0 in case of success, the set of unknown or unimplemented options
15133 * in case of error.
15134 */
15135static int
15136xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15137{
15138 if (ctxt == NULL)
15139 return(-1);
15140 if (encoding != NULL) {
15141 if (ctxt->encoding != NULL)
15142 xmlFree((xmlChar *) ctxt->encoding);
15143 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15144 }
15145 if (options & XML_PARSE_RECOVER) {
15146 ctxt->recovery = 1;
15147 options -= XML_PARSE_RECOVER;
15148 ctxt->options |= XML_PARSE_RECOVER;
15149 } else
15150 ctxt->recovery = 0;
15151 if (options & XML_PARSE_DTDLOAD) {
15152 ctxt->loadsubset = XML_DETECT_IDS;
15153 options -= XML_PARSE_DTDLOAD;
15154 ctxt->options |= XML_PARSE_DTDLOAD;
15155 } else
15156 ctxt->loadsubset = 0;
15157 if (options & XML_PARSE_DTDATTR) {
15158 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15159 options -= XML_PARSE_DTDATTR;
15160 ctxt->options |= XML_PARSE_DTDATTR;
15161 }
15162 if (options & XML_PARSE_NOENT) {
15163 ctxt->replaceEntities = 1;
15164 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15165 options -= XML_PARSE_NOENT;
15166 ctxt->options |= XML_PARSE_NOENT;
15167 } else
15168 ctxt->replaceEntities = 0;
15169 if (options & XML_PARSE_PEDANTIC) {
15170 ctxt->pedantic = 1;
15171 options -= XML_PARSE_PEDANTIC;
15172 ctxt->options |= XML_PARSE_PEDANTIC;
15173 } else
15174 ctxt->pedantic = 0;
15175 if (options & XML_PARSE_NOBLANKS) {
15176 ctxt->keepBlanks = 0;
15177 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15178 options -= XML_PARSE_NOBLANKS;
15179 ctxt->options |= XML_PARSE_NOBLANKS;
15180 } else
15181 ctxt->keepBlanks = 1;
15182 if (options & XML_PARSE_DTDVALID) {
15183 ctxt->validate = 1;
15184 if (options & XML_PARSE_NOWARNING)
15185 ctxt->vctxt.warning = NULL;
15186 if (options & XML_PARSE_NOERROR)
15187 ctxt->vctxt.error = NULL;
15188 options -= XML_PARSE_DTDVALID;
15189 ctxt->options |= XML_PARSE_DTDVALID;
15190 } else
15191 ctxt->validate = 0;
15192 if (options & XML_PARSE_NOWARNING) {
15193 ctxt->sax->warning = NULL;
15194 options -= XML_PARSE_NOWARNING;
15195 }
15196 if (options & XML_PARSE_NOERROR) {
15197 ctxt->sax->error = NULL;
15198 ctxt->sax->fatalError = NULL;
15199 options -= XML_PARSE_NOERROR;
15200 }
15201#ifdef LIBXML_SAX1_ENABLED
15202 if (options & XML_PARSE_SAX1) {
15203 ctxt->sax->startElement = xmlSAX2StartElement;
15204 ctxt->sax->endElement = xmlSAX2EndElement;
15205 ctxt->sax->startElementNs = NULL;
15206 ctxt->sax->endElementNs = NULL;
15207 ctxt->sax->initialized = 1;
15208 options -= XML_PARSE_SAX1;
15209 ctxt->options |= XML_PARSE_SAX1;
15210 }
15211#endif /* LIBXML_SAX1_ENABLED */
15212 if (options & XML_PARSE_NODICT) {
15213 ctxt->dictNames = 0;
15214 options -= XML_PARSE_NODICT;
15215 ctxt->options |= XML_PARSE_NODICT;
15216 } else {
15217 ctxt->dictNames = 1;
15218 }
15219 if (options & XML_PARSE_NOCDATA) {
15220 ctxt->sax->cdataBlock = NULL;
15221 options -= XML_PARSE_NOCDATA;
15222 ctxt->options |= XML_PARSE_NOCDATA;
15223 }
15224 if (options & XML_PARSE_NSCLEAN) {
15225 ctxt->options |= XML_PARSE_NSCLEAN;
15226 options -= XML_PARSE_NSCLEAN;
15227 }
15228 if (options & XML_PARSE_NONET) {
15229 ctxt->options |= XML_PARSE_NONET;
15230 options -= XML_PARSE_NONET;
15231 }
15232 if (options & XML_PARSE_COMPACT) {
15233 ctxt->options |= XML_PARSE_COMPACT;
15234 options -= XML_PARSE_COMPACT;
15235 }
15236 if (options & XML_PARSE_OLD10) {
15237 ctxt->options |= XML_PARSE_OLD10;
15238 options -= XML_PARSE_OLD10;
15239 }
15240 if (options & XML_PARSE_NOBASEFIX) {
15241 ctxt->options |= XML_PARSE_NOBASEFIX;
15242 options -= XML_PARSE_NOBASEFIX;
15243 }
15244 if (options & XML_PARSE_HUGE) {
15245 ctxt->options |= XML_PARSE_HUGE;
15246 options -= XML_PARSE_HUGE;
15247 if (ctxt->dict != NULL)
15248 xmlDictSetLimit(ctxt->dict, 0);
15249 }
15250 if (options & XML_PARSE_OLDSAX) {
15251 ctxt->options |= XML_PARSE_OLDSAX;
15252 options -= XML_PARSE_OLDSAX;
15253 }
15254 if (options & XML_PARSE_IGNORE_ENC) {
15255 ctxt->options |= XML_PARSE_IGNORE_ENC;
15256 options -= XML_PARSE_IGNORE_ENC;
15257 }
15258 if (options & XML_PARSE_BIG_LINES) {
15259 ctxt->options |= XML_PARSE_BIG_LINES;
15260 options -= XML_PARSE_BIG_LINES;
15261 }
15262 ctxt->linenumbers = 1;
15263 return (options);
15264}
15265
15266/**
15267 * xmlCtxtUseOptions:
15268 * @ctxt: an XML parser context
15269 * @options: a combination of xmlParserOption
15270 *
15271 * Applies the options to the parser context
15272 *
15273 * Returns 0 in case of success, the set of unknown or unimplemented options
15274 * in case of error.
15275 */
15276int
15277xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15278{
15279 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15280}
15281
15282/**
15283 * xmlDoRead:
15284 * @ctxt: an XML parser context
15285 * @URL: the base URL to use for the document
15286 * @encoding: the document encoding, or NULL
15287 * @options: a combination of xmlParserOption
15288 * @reuse: keep the context for reuse
15289 *
15290 * Common front-end for the xmlRead functions
15291 *
15292 * Returns the resulting document tree or NULL
15293 */
15294static xmlDocPtr
15295xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15296 int options, int reuse)
15297{
15298 xmlDocPtr ret;
15299
15300 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15301 if (encoding != NULL) {
15302 xmlCharEncodingHandlerPtr hdlr;
15303
15304 hdlr = xmlFindCharEncodingHandler(encoding);
15305 if (hdlr != NULL)
15306 xmlSwitchToEncoding(ctxt, hdlr);
15307 }
15308 if ((URL != NULL) && (ctxt->input != NULL) &&
15309 (ctxt->input->filename == NULL))
15310 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15311 xmlParseDocument(ctxt);
15312 if ((ctxt->wellFormed) || ctxt->recovery)
15313 ret = ctxt->myDoc;
15314 else {
15315 ret = NULL;
15316 if (ctxt->myDoc != NULL) {
15317 xmlFreeDoc(ctxt->myDoc);
15318 }
15319 }
15320 ctxt->myDoc = NULL;
15321 if (!reuse) {
15322 xmlFreeParserCtxt(ctxt);
15323 }
15324
15325 return (ret);
15326}
15327
15328/**
15329 * xmlReadDoc:
15330 * @cur: a pointer to a zero terminated string
15331 * @URL: the base URL to use for the document
15332 * @encoding: the document encoding, or NULL
15333 * @options: a combination of xmlParserOption
15334 *
15335 * parse an XML in-memory document and build a tree.
15336 *
15337 * Returns the resulting document tree
15338 */
15339xmlDocPtr
15340xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15341{
15342 xmlParserCtxtPtr ctxt;
15343
15344 if (cur == NULL)
15345 return (NULL);
15346 xmlInitParser();
15347
15348 ctxt = xmlCreateDocParserCtxt(cur);
15349 if (ctxt == NULL)
15350 return (NULL);
15351 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15352}
15353
15354/**
15355 * xmlReadFile:
15356 * @filename: a file or URL
15357 * @encoding: the document encoding, or NULL
15358 * @options: a combination of xmlParserOption
15359 *
15360 * parse an XML file from the filesystem or the network.
15361 *
15362 * Returns the resulting document tree
15363 */
15364xmlDocPtr
15365xmlReadFile(const char *filename, const char *encoding, int options)
15366{
15367 xmlParserCtxtPtr ctxt;
15368
15369 xmlInitParser();
15370 ctxt = xmlCreateURLParserCtxt(filename, options);
15371 if (ctxt == NULL)
15372 return (NULL);
15373 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15374}
15375
15376/**
15377 * xmlReadMemory:
15378 * @buffer: a pointer to a char array
15379 * @size: the size of the array
15380 * @URL: the base URL to use for the document
15381 * @encoding: the document encoding, or NULL
15382 * @options: a combination of xmlParserOption
15383 *
15384 * parse an XML in-memory document and build a tree.
15385 *
15386 * Returns the resulting document tree
15387 */
15388xmlDocPtr
15389xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15390{
15391 xmlParserCtxtPtr ctxt;
15392
15393 xmlInitParser();
15394 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15395 if (ctxt == NULL)
15396 return (NULL);
15397 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15398}
15399
15400/**
15401 * xmlReadFd:
15402 * @fd: an open file descriptor
15403 * @URL: the base URL to use for the document
15404 * @encoding: the document encoding, or NULL
15405 * @options: a combination of xmlParserOption
15406 *
15407 * parse an XML from a file descriptor and build a tree.
15408 * NOTE that the file descriptor will not be closed when the
15409 * reader is closed or reset.
15410 *
15411 * Returns the resulting document tree
15412 */
15413xmlDocPtr
15414xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15415{
15416 xmlParserCtxtPtr ctxt;
15417 xmlParserInputBufferPtr input;
15418 xmlParserInputPtr stream;
15419
15420 if (fd < 0)
15421 return (NULL);
15422 xmlInitParser();
15423
15424 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15425 if (input == NULL)
15426 return (NULL);
15427 input->closecallback = NULL;
15428 ctxt = xmlNewParserCtxt();
15429 if (ctxt == NULL) {
15430 xmlFreeParserInputBuffer(input);
15431 return (NULL);
15432 }
15433 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15434 if (stream == NULL) {
15435 xmlFreeParserInputBuffer(input);
15436 xmlFreeParserCtxt(ctxt);
15437 return (NULL);
15438 }
15439 inputPush(ctxt, stream);
15440 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15441}
15442
15443/**
15444 * xmlReadIO:
15445 * @ioread: an I/O read function
15446 * @ioclose: an I/O close function
15447 * @ioctx: an I/O handler
15448 * @URL: the base URL to use for the document
15449 * @encoding: the document encoding, or NULL
15450 * @options: a combination of xmlParserOption
15451 *
15452 * parse an XML document from I/O functions and source and build a tree.
15453 *
15454 * Returns the resulting document tree
15455 */
15456xmlDocPtr
15457xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15458 void *ioctx, const char *URL, const char *encoding, int options)
15459{
15460 xmlParserCtxtPtr ctxt;
15461 xmlParserInputBufferPtr input;
15462 xmlParserInputPtr stream;
15463
15464 if (ioread == NULL)
15465 return (NULL);
15466 xmlInitParser();
15467
15468 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15469 XML_CHAR_ENCODING_NONE);
15470 if (input == NULL) {
15471 if (ioclose != NULL)
15472 ioclose(ioctx);
15473 return (NULL);
15474 }
15475 ctxt = xmlNewParserCtxt();
15476 if (ctxt == NULL) {
15477 xmlFreeParserInputBuffer(input);
15478 return (NULL);
15479 }
15480 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15481 if (stream == NULL) {
15482 xmlFreeParserInputBuffer(input);
15483 xmlFreeParserCtxt(ctxt);
15484 return (NULL);
15485 }
15486 inputPush(ctxt, stream);
15487 return (xmlDoRead(ctxt, URL, encoding, options, 0));
15488}
15489
15490/**
15491 * xmlCtxtReadDoc:
15492 * @ctxt: an XML parser context
15493 * @cur: a pointer to a zero terminated string
15494 * @URL: the base URL to use for the document
15495 * @encoding: the document encoding, or NULL
15496 * @options: a combination of xmlParserOption
15497 *
15498 * parse an XML in-memory document and build a tree.
15499 * This reuses the existing @ctxt parser context
15500 *
15501 * Returns the resulting document tree
15502 */
15503xmlDocPtr
15504xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15505 const char *URL, const char *encoding, int options)
15506{
15507 xmlParserInputPtr stream;
15508
15509 if (cur == NULL)
15510 return (NULL);
15511 if (ctxt == NULL)
15512 return (NULL);
15513 xmlInitParser();
15514
15515 xmlCtxtReset(ctxt);
15516
15517 stream = xmlNewStringInputStream(ctxt, cur);
15518 if (stream == NULL) {
15519 return (NULL);
15520 }
15521 inputPush(ctxt, stream);
15522 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15523}
15524
15525/**
15526 * xmlCtxtReadFile:
15527 * @ctxt: an XML parser context
15528 * @filename: a file or URL
15529 * @encoding: the document encoding, or NULL
15530 * @options: a combination of xmlParserOption
15531 *
15532 * parse an XML file from the filesystem or the network.
15533 * This reuses the existing @ctxt parser context
15534 *
15535 * Returns the resulting document tree
15536 */
15537xmlDocPtr
15538xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15539 const char *encoding, int options)
15540{
15541 xmlParserInputPtr stream;
15542
15543 if (filename == NULL)
15544 return (NULL);
15545 if (ctxt == NULL)
15546 return (NULL);
15547 xmlInitParser();
15548
15549 xmlCtxtReset(ctxt);
15550
15551 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15552 if (stream == NULL) {
15553 return (NULL);
15554 }
15555 inputPush(ctxt, stream);
15556 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15557}
15558
15559/**
15560 * xmlCtxtReadMemory:
15561 * @ctxt: an XML parser context
15562 * @buffer: a pointer to a char array
15563 * @size: the size of the array
15564 * @URL: the base URL to use for the document
15565 * @encoding: the document encoding, or NULL
15566 * @options: a combination of xmlParserOption
15567 *
15568 * parse an XML in-memory document and build a tree.
15569 * This reuses the existing @ctxt parser context
15570 *
15571 * Returns the resulting document tree
15572 */
15573xmlDocPtr
15574xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15575 const char *URL, const char *encoding, int options)
15576{
15577 xmlParserInputBufferPtr input;
15578 xmlParserInputPtr stream;
15579
15580 if (ctxt == NULL)
15581 return (NULL);
15582 if (buffer == NULL)
15583 return (NULL);
15584 xmlInitParser();
15585
15586 xmlCtxtReset(ctxt);
15587
15588 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15589 if (input == NULL) {
15590 return(NULL);
15591 }
15592
15593 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15594 if (stream == NULL) {
15595 xmlFreeParserInputBuffer(input);
15596 return(NULL);
15597 }
15598
15599 inputPush(ctxt, stream);
15600 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15601}
15602
15603/**
15604 * xmlCtxtReadFd:
15605 * @ctxt: an XML parser context
15606 * @fd: an open file descriptor
15607 * @URL: the base URL to use for the document
15608 * @encoding: the document encoding, or NULL
15609 * @options: a combination of xmlParserOption
15610 *
15611 * parse an XML from a file descriptor and build a tree.
15612 * This reuses the existing @ctxt parser context
15613 * NOTE that the file descriptor will not be closed when the
15614 * reader is closed or reset.
15615 *
15616 * Returns the resulting document tree
15617 */
15618xmlDocPtr
15619xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15620 const char *URL, const char *encoding, int options)
15621{
15622 xmlParserInputBufferPtr input;
15623 xmlParserInputPtr stream;
15624
15625 if (fd < 0)
15626 return (NULL);
15627 if (ctxt == NULL)
15628 return (NULL);
15629 xmlInitParser();
15630
15631 xmlCtxtReset(ctxt);
15632
15633
15634 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15635 if (input == NULL)
15636 return (NULL);
15637 input->closecallback = NULL;
15638 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15639 if (stream == NULL) {
15640 xmlFreeParserInputBuffer(input);
15641 return (NULL);
15642 }
15643 inputPush(ctxt, stream);
15644 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15645}
15646
15647/**
15648 * xmlCtxtReadIO:
15649 * @ctxt: an XML parser context
15650 * @ioread: an I/O read function
15651 * @ioclose: an I/O close function
15652 * @ioctx: an I/O handler
15653 * @URL: the base URL to use for the document
15654 * @encoding: the document encoding, or NULL
15655 * @options: a combination of xmlParserOption
15656 *
15657 * parse an XML document from I/O functions and source and build a tree.
15658 * This reuses the existing @ctxt parser context
15659 *
15660 * Returns the resulting document tree
15661 */
15662xmlDocPtr
15663xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15664 xmlInputCloseCallback ioclose, void *ioctx,
15665 const char *URL,
15666 const char *encoding, int options)
15667{
15668 xmlParserInputBufferPtr input;
15669 xmlParserInputPtr stream;
15670
15671 if (ioread == NULL)
15672 return (NULL);
15673 if (ctxt == NULL)
15674 return (NULL);
15675 xmlInitParser();
15676
15677 xmlCtxtReset(ctxt);
15678
15679 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15680 XML_CHAR_ENCODING_NONE);
15681 if (input == NULL) {
15682 if (ioclose != NULL)
15683 ioclose(ioctx);
15684 return (NULL);
15685 }
15686 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15687 if (stream == NULL) {
15688 xmlFreeParserInputBuffer(input);
15689 return (NULL);
15690 }
15691 inputPush(ctxt, stream);
15692 return (xmlDoRead(ctxt, URL, encoding, options, 1));
15693}
15694
15695#define bottom_parser
15696#include "elfgcchack.h"
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette