parserInternals.c@ 15236

Last change on this file since 15236 was 6076, checked in by vboxsync, 17 years ago
Merged dmik/s2 branch (r25959:26751) to the trunk.
Property svn:eol-style set to `native` Property svn:keywords set to `Date Revision Author Id`
File size: 58.9 KB

Line
1	/*
2	* parserInternals.c : Internal routines (and obsolete ones) needed for the
3	* XML and HTML parsers.
4	*
5	* See Copyright for the status of this software.
6	*
7	* daniel@veillard.com
8	*/
9
10	#define IN_LIBXML
11	#include "libxml.h"
12
13	#if defined(WIN32) && !defined (__CYGWIN__)
14	#define XML_DIR_SEP '\\'
15	#else
16	#define XML_DIR_SEP '/'
17	#endif
18
19	#include <string.h>
20	#ifdef HAVE_CTYPE_H
21	#include <ctype.h>
22	#endif
23	#ifdef HAVE_STDLIB_H
24	#include <stdlib.h>
25	#endif
26	#ifdef HAVE_SYS_STAT_H
27	#include <sys/stat.h>
28	#endif
29	#ifdef HAVE_FCNTL_H
30	#include <fcntl.h>
31	#endif
32	#ifdef HAVE_UNISTD_H
33	#include <unistd.h>
34	#endif
35	#ifdef HAVE_ZLIB_H
36	#include <zlib.h>
37	#endif
38
39	#include <libxml/xmlmemory.h>
40	#include <libxml/tree.h>
41	#include <libxml/parser.h>
42	#include <libxml/parserInternals.h>
43	#include <libxml/valid.h>
44	#include <libxml/entities.h>
45	#include <libxml/xmlerror.h>
46	#include <libxml/encoding.h>
47	#include <libxml/valid.h>
48	#include <libxml/xmlIO.h>
49	#include <libxml/uri.h>
50	#include <libxml/dict.h>
51	#include <libxml/SAX.h>
52	#ifdef LIBXML_CATALOG_ENABLED
53	#include <libxml/catalog.h>
54	#endif
55	#include <libxml/globals.h>
56	#include <libxml/chvalid.h>
57
58	/*
59	* Various global defaults for parsing
60	*/
61
62	/**
63	* xmlCheckVersion:
64	* @version: the include version number
65	*
66	* check the compiled lib version against the include one.
67	* This can warn or immediately kill the application
68	*/
69	void
70	xmlCheckVersion(int version) {
71	int myversion = (int) LIBXML_VERSION;
72
73	xmlInitParser();
74
75	if ((myversion / 10000) != (version / 10000)) {
76	xmlGenericError(xmlGenericErrorContext,
77	"Fatal: program compiled against libxml %d using libxml %d\n",
78	(version / 10000), (myversion / 10000));
79	fprintf(stderr,
80	"Fatal: program compiled against libxml %d using libxml %d\n",
81	(version / 10000), (myversion / 10000));
82	}
83	if ((myversion / 100) < (version / 100)) {
84	xmlGenericError(xmlGenericErrorContext,
85	"Warning: program compiled against libxml %d using older %d\n",
86	(version / 100), (myversion / 100));
87	}
88	}
89
90
91	/************************************************************************
92	* *
93	* Some factorized error routines *
94	* *
95	************************************************************************/
96
97
98	/**
99	* xmlErrMemory:
100	* @ctxt: an XML parser context
101	* @extra: extra informations
102	*
103	* Handle a redefinition of attribute error
104	*/
105	void
106	xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
107	{
108	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
109	(ctxt->instate == XML_PARSER_EOF))
110	return;
111	if (ctxt != NULL) {
112	ctxt->errNo = XML_ERR_NO_MEMORY;
113	ctxt->instate = XML_PARSER_EOF;
114	ctxt->disableSAX = 1;
115	}
116	if (extra)
117	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
118	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
119	NULL, NULL, 0, 0,
120	"Memory allocation failed : %s\n", extra);
121	else
122	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
123	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
124	NULL, NULL, 0, 0, "Memory allocation failed\n");
125	}
126
127	/**
128	* __xmlErrEncoding:
129	* @ctxt: an XML parser context
130	* @xmlerr: the error number
131	* @msg: the error message
132	* @str1: an string info
133	* @str2: an string info
134	*
135	* Handle an encoding error
136	*/
137	void
138	__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
139	const char msg, const xmlChar str1, const xmlChar * str2)
140	{
141	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
142	(ctxt->instate == XML_PARSER_EOF))
143	return;
144	if (ctxt != NULL)
145	ctxt->errNo = xmlerr;
146	__xmlRaiseError(NULL, NULL, NULL,
147	ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
148	NULL, 0, (const char ) str1, (const char ) str2,
149	NULL, 0, 0, msg, str1, str2);
150	if (ctxt != NULL) {
151	ctxt->wellFormed = 0;
152	if (ctxt->recovery == 0)
153	ctxt->disableSAX = 1;
154	}
155	}
156
157	/**
158	* xmlErrInternal:
159	* @ctxt: an XML parser context
160	* @msg: the error message
161	* @str: error informations
162	*
163	* Handle an internal error
164	*/
165	static void
166	xmlErrInternal(xmlParserCtxtPtr ctxt, const char msg, const xmlChar str)
167	{
168	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
169	(ctxt->instate == XML_PARSER_EOF))
170	return;
171	if (ctxt != NULL)
172	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
173	__xmlRaiseError(NULL, NULL, NULL,
174	ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
175	XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
176	0, 0, msg, str);
177	if (ctxt != NULL) {
178	ctxt->wellFormed = 0;
179	if (ctxt->recovery == 0)
180	ctxt->disableSAX = 1;
181	}
182	}
183
184	/**
185	* xmlErrEncodingInt:
186	* @ctxt: an XML parser context
187	* @error: the error number
188	* @msg: the error message
189	* @val: an integer value
190	*
191	* n encoding error
192	*/
193	static void
194	xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
195	const char *msg, int val)
196	{
197	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
198	(ctxt->instate == XML_PARSER_EOF))
199	return;
200	if (ctxt != NULL)
201	ctxt->errNo = error;
202	__xmlRaiseError(NULL, NULL, NULL,
203	ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
204	NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
205	if (ctxt != NULL) {
206	ctxt->wellFormed = 0;
207	if (ctxt->recovery == 0)
208	ctxt->disableSAX = 1;
209	}
210	}
211
212	/**
213	* xmlIsLetter:
214	* @c: an unicode character (int)
215	*
216	* Check whether the character is allowed by the production
217	* [84] Letter ::= BaseChar \| Ideographic
218	*
219	* Returns 0 if not, non-zero otherwise
220	*/
221	int
222	xmlIsLetter(int c) {
223	return(IS_BASECHAR(c) \|\| IS_IDEOGRAPHIC(c));
224	}
225
226	/************************************************************************
227	* *
228	* Input handling functions for progressive parsing *
229	* *
230	************************************************************************/
231
232	/* #define DEBUG_INPUT */
233	/* #define DEBUG_STACK */
234	/* #define DEBUG_PUSH */
235
236
237	/* we need to keep enough input to show errors in context */
238	#define LINE_LEN 80
239
240	#ifdef DEBUG_INPUT
241	#define CHECK_BUFFER(in) check_buffer(in)
242
243	static
244	void check_buffer(xmlParserInputPtr in) {
245	if (in->base != in->buf->buffer->content) {
246	xmlGenericError(xmlGenericErrorContext,
247	"xmlParserInput: base mismatch problem\n");
248	}
249	if (in->cur < in->base) {
250	xmlGenericError(xmlGenericErrorContext,
251	"xmlParserInput: cur < base problem\n");
252	}
253	if (in->cur > in->base + in->buf->buffer->use) {
254	xmlGenericError(xmlGenericErrorContext,
255	"xmlParserInput: cur > base + use problem\n");
256	}
257	xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
258	(int) in, (int) in->buf->buffer->content, in->cur - in->base,
259	in->buf->buffer->use, in->buf->buffer->size);
260	}
261
262	#else
263	#define CHECK_BUFFER(in)
264	#endif
265
266
267	/**
268	* xmlParserInputRead:
269	* @in: an XML parser input
270	* @len: an indicative size for the lookahead
271	*
272	* This function refresh the input for the parser. It doesn't try to
273	* preserve pointers to the input buffer, and discard already read data
274	*
275	* Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
276	* end of this entity
277	*/
278	int
279	xmlParserInputRead(xmlParserInputPtr in, int len) {
280	int ret;
281	int used;
282	int indx;
283
284	if (in == NULL) return(-1);
285	#ifdef DEBUG_INPUT
286	xmlGenericError(xmlGenericErrorContext, "Read\n");
287	#endif
288	if (in->buf == NULL) return(-1);
289	if (in->base == NULL) return(-1);
290	if (in->cur == NULL) return(-1);
291	if (in->buf->buffer == NULL) return(-1);
292	if (in->buf->readcallback == NULL) return(-1);
293
294	CHECK_BUFFER(in);
295
296	used = in->cur - in->buf->buffer->content;
297	ret = xmlBufferShrink(in->buf->buffer, used);
298	if (ret > 0) {
299	in->cur -= ret;
300	in->consumed += ret;
301	}
302	ret = xmlParserInputBufferRead(in->buf, len);
303	if (in->base != in->buf->buffer->content) {
304	/*
305	* the buffer has been reallocated
306	*/
307	indx = in->cur - in->base;
308	in->base = in->buf->buffer->content;
309	in->cur = &in->buf->buffer->content[indx];
310	}
311	in->end = &in->buf->buffer->content[in->buf->buffer->use];
312
313	CHECK_BUFFER(in);
314
315	return(ret);
316	}
317
318	/**
319	* xmlParserInputGrow:
320	* @in: an XML parser input
321	* @len: an indicative size for the lookahead
322	*
323	* This function increase the input for the parser. It tries to
324	* preserve pointers to the input buffer, and keep already read data
325	*
326	* Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
327	* end of this entity
328	*/
329	int
330	xmlParserInputGrow(xmlParserInputPtr in, int len) {
331	int ret;
332	int indx;
333
334	if (in == NULL) return(-1);
335	#ifdef DEBUG_INPUT
336	xmlGenericError(xmlGenericErrorContext, "Grow\n");
337	#endif
338	if (in->buf == NULL) return(-1);
339	if (in->base == NULL) return(-1);
340	if (in->cur == NULL) return(-1);
341	if (in->buf->buffer == NULL) return(-1);
342
343	CHECK_BUFFER(in);
344
345	indx = in->cur - in->base;
346	if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
347
348	CHECK_BUFFER(in);
349
350	return(0);
351	}
352	if (in->buf->readcallback != NULL)
353	ret = xmlParserInputBufferGrow(in->buf, len);
354	else
355	return(0);
356
357	/*
358	* NOTE : in->base may be a "dangling" i.e. freed pointer in this
359	* block, but we use it really as an integer to do some
360	* pointer arithmetic. Insure will raise it as a bug but in
361	* that specific case, that's not !
362	*/
363	if (in->base != in->buf->buffer->content) {
364	/*
365	* the buffer has been reallocated
366	*/
367	indx = in->cur - in->base;
368	in->base = in->buf->buffer->content;
369	in->cur = &in->buf->buffer->content[indx];
370	}
371	in->end = &in->buf->buffer->content[in->buf->buffer->use];
372
373	CHECK_BUFFER(in);
374
375	return(ret);
376	}
377
378	/**
379	* xmlParserInputShrink:
380	* @in: an XML parser input
381	*
382	* This function removes used input for the parser.
383	*/
384	void
385	xmlParserInputShrink(xmlParserInputPtr in) {
386	int used;
387	int ret;
388	int indx;
389
390	#ifdef DEBUG_INPUT
391	xmlGenericError(xmlGenericErrorContext, "Shrink\n");
392	#endif
393	if (in == NULL) return;
394	if (in->buf == NULL) return;
395	if (in->base == NULL) return;
396	if (in->cur == NULL) return;
397	if (in->buf->buffer == NULL) return;
398
399	CHECK_BUFFER(in);
400
401	used = in->cur - in->buf->buffer->content;
402	/*
403	* Do not shrink on large buffers whose only a tiny fraction
404	* was consumed
405	*/
406	if (used > INPUT_CHUNK) {
407	ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
408	if (ret > 0) {
409	in->cur -= ret;
410	in->consumed += ret;
411	}
412	in->end = &in->buf->buffer->content[in->buf->buffer->use];
413	}
414
415	CHECK_BUFFER(in);
416
417	if (in->buf->buffer->use > INPUT_CHUNK) {
418	return;
419	}
420	xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
421	if (in->base != in->buf->buffer->content) {
422	/*
423	* the buffer has been reallocated
424	*/
425	indx = in->cur - in->base;
426	in->base = in->buf->buffer->content;
427	in->cur = &in->buf->buffer->content[indx];
428	}
429	in->end = &in->buf->buffer->content[in->buf->buffer->use];
430
431	CHECK_BUFFER(in);
432	}
433
434	/************************************************************************
435	* *
436	* UTF8 character input and related functions *
437	* *
438	************************************************************************/
439
440	/**
441	* xmlNextChar:
442	* @ctxt: the XML parser context
443	*
444	* Skip to the next char input char.
445	*/
446
447	void
448	xmlNextChar(xmlParserCtxtPtr ctxt)
449	{
450	if ((ctxt == NULL) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
451	(ctxt->input == NULL))
452	return;
453
454	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
455	if ((*ctxt->input->cur == 0) &&
456	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
457	(ctxt->instate != XML_PARSER_COMMENT)) {
458	/*
459	* If we are at the end of the current entity and
460	* the context allows it, we pop consumed entities
461	* automatically.
462	* the auto closing should be blocked in other cases
463	*/
464	xmlPopInput(ctxt);
465	} else {
466	const unsigned char *cur;
467	unsigned char c;
468
469	/*
470	* 2.11 End-of-Line Handling
471	* the literal two-character sequence "#xD#xA" or a standalone
472	* literal #xD, an XML processor must pass to the application
473	* the single character #xA.
474	*/
475	if (*(ctxt->input->cur) == '\n') {
476	ctxt->input->line++; ctxt->input->col = 1;
477	} else
478	ctxt->input->col++;
479
480	/*
481	* We are supposed to handle UTF8, check it's valid
482	* From rfc2044: encoding of the Unicode values on UTF-8:
483	*
484	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
485	* 0000 0000-0000 007F 0xxxxxxx
486	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
487	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
488	*
489	* Check for the 0x110000 limit too
490	*/
491	cur = ctxt->input->cur;
492
493	c = *cur;
494	if (c & 0x80) {
495	if (c == 0xC0)
496	goto encoding_error;
497	if (cur[1] == 0)
498	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
499	if ((cur[1] & 0xc0) != 0x80)
500	goto encoding_error;
501	if ((c & 0xe0) == 0xe0) {
502	unsigned int val;
503
504	if (cur[2] == 0)
505	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
506	if ((cur[2] & 0xc0) != 0x80)
507	goto encoding_error;
508	if ((c & 0xf0) == 0xf0) {
509	if (cur[3] == 0)
510	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
511	if (((c & 0xf8) != 0xf0) \|\|
512	((cur[3] & 0xc0) != 0x80))
513	goto encoding_error;
514	/* 4-byte code */
515	ctxt->input->cur += 4;
516	val = (cur[0] & 0x7) << 18;
517	val \|= (cur[1] & 0x3f) << 12;
518	val \|= (cur[2] & 0x3f) << 6;
519	val \|= cur[3] & 0x3f;
520	} else {
521	/* 3-byte code */
522	ctxt->input->cur += 3;
523	val = (cur[0] & 0xf) << 12;
524	val \|= (cur[1] & 0x3f) << 6;
525	val \|= cur[2] & 0x3f;
526	}
527	if (((val > 0xd7ff) && (val < 0xe000)) \|\|
528	((val > 0xfffd) && (val < 0x10000)) \|\|
529	(val >= 0x110000)) {
530	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
531	"Char 0x%X out of allowed range\n",
532	val);
533	}
534	} else
535	/* 2-byte code */
536	ctxt->input->cur += 2;
537	} else
538	/* 1-byte code */
539	ctxt->input->cur++;
540
541	ctxt->nbChars++;
542	if (*ctxt->input->cur == 0)
543	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
544	}
545	} else {
546	/*
547	* Assume it's a fixed length encoding (1) with
548	* a compatible encoding for the ASCII set, since
549	* XML constructs only use < 128 chars
550	*/
551
552	if (*(ctxt->input->cur) == '\n') {
553	ctxt->input->line++; ctxt->input->col = 1;
554	} else
555	ctxt->input->col++;
556	ctxt->input->cur++;
557	ctxt->nbChars++;
558	if (*ctxt->input->cur == 0)
559	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
560	}
561	if ((*ctxt->input->cur == '%') && (!ctxt->html))
562	xmlParserHandlePEReference(ctxt);
563	if ((*ctxt->input->cur == 0) &&
564	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
565	xmlPopInput(ctxt);
566	return;
567	encoding_error:
568	/*
569	* If we detect an UTF8 error that probably mean that the
570	* input encoding didn't get properly advertised in the
571	* declaration header. Report the error and switch the encoding
572	* to ISO-Latin-1 (if you don't like this policy, just declare the
573	* encoding !)
574	*/
575	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
576	(ctxt->input->end - ctxt->input->cur < 4)) {
577	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
578	"Input is not proper UTF-8, indicate encoding !\n",
579	NULL, NULL);
580	} else {
581	char buffer[150];
582
583	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
584	ctxt->input->cur[0], ctxt->input->cur[1],
585	ctxt->input->cur[2], ctxt->input->cur[3]);
586	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
587	"Input is not proper UTF-8, indicate encoding !\n%s",
588	BAD_CAST buffer, NULL);
589	}
590	ctxt->charset = XML_CHAR_ENCODING_8859_1;
591	ctxt->input->cur++;
592	return;
593	}
594
595	/**
596	* xmlCurrentChar:
597	* @ctxt: the XML parser context
598	* @len: pointer to the length of the char read
599	*
600	* The current char value, if using UTF-8 this may actually span multiple
601	* bytes in the input buffer. Implement the end of line normalization:
602	* 2.11 End-of-Line Handling
603	* Wherever an external parsed entity or the literal entity value
604	* of an internal parsed entity contains either the literal two-character
605	* sequence "#xD#xA" or a standalone literal #xD, an XML processor
606	* must pass to the application the single character #xA.
607	* This behavior can conveniently be produced by normalizing all
608	* line breaks to #xA on input, before parsing.)
609	*
610	* Returns the current char value and its length
611	*/
612
613	int
614	xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
615	if ((ctxt == NULL) \|\| (len == NULL) \|\| (ctxt->input == NULL)) return(0);
616	if (ctxt->instate == XML_PARSER_EOF)
617	return(0);
618
619	if ((ctxt->input->cur >= 0x20) && (ctxt->input->cur <= 0x7F)) {
620	*len = 1;
621	return((int) *ctxt->input->cur);
622	}
623	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
624	/*
625	* We are supposed to handle UTF8, check it's valid
626	* From rfc2044: encoding of the Unicode values on UTF-8:
627	*
628	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
629	* 0000 0000-0000 007F 0xxxxxxx
630	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
631	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
632	*
633	* Check for the 0x110000 limit too
634	*/
635	const unsigned char *cur = ctxt->input->cur;
636	unsigned char c;
637	unsigned int val;
638
639	c = *cur;
640	if (c & 0x80) {
641	if (c == 0xC0)
642	goto encoding_error;
643	if (cur[1] == 0)
644	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
645	if ((cur[1] & 0xc0) != 0x80)
646	goto encoding_error;
647	if ((c & 0xe0) == 0xe0) {
648
649	if (cur[2] == 0)
650	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
651	if ((cur[2] & 0xc0) != 0x80)
652	goto encoding_error;
653	if ((c & 0xf0) == 0xf0) {
654	if (cur[3] == 0)
655	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
656	if (((c & 0xf8) != 0xf0) \|\|
657	((cur[3] & 0xc0) != 0x80))
658	goto encoding_error;
659	/* 4-byte code */
660	*len = 4;
661	val = (cur[0] & 0x7) << 18;
662	val \|= (cur[1] & 0x3f) << 12;
663	val \|= (cur[2] & 0x3f) << 6;
664	val \|= cur[3] & 0x3f;
665	} else {
666	/* 3-byte code */
667	*len = 3;
668	val = (cur[0] & 0xf) << 12;
669	val \|= (cur[1] & 0x3f) << 6;
670	val \|= cur[2] & 0x3f;
671	}
672	} else {
673	/* 2-byte code */
674	*len = 2;
675	val = (cur[0] & 0x1f) << 6;
676	val \|= cur[1] & 0x3f;
677	}
678	if (!IS_CHAR(val)) {
679	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
680	"Char 0x%X out of allowed range\n", val);
681	}
682	return(val);
683	} else {
684	/* 1-byte code */
685	*len = 1;
686	if (*ctxt->input->cur == 0xD) {
687	if (ctxt->input->cur[1] == 0xA) {
688	ctxt->nbChars++;
689	ctxt->input->cur++;
690	}
691	return(0xA);
692	}
693	return((int) *ctxt->input->cur);
694	}
695	}
696	/*
697	* Assume it's a fixed length encoding (1) with
698	* a compatible encoding for the ASCII set, since
699	* XML constructs only use < 128 chars
700	*/
701	*len = 1;
702	if (*ctxt->input->cur == 0xD) {
703	if (ctxt->input->cur[1] == 0xA) {
704	ctxt->nbChars++;
705	ctxt->input->cur++;
706	}
707	return(0xA);
708	}
709	return((int) *ctxt->input->cur);
710	encoding_error:
711	/*
712	* An encoding problem may arise from a truncated input buffer
713	* splitting a character in the middle. In that case do not raise
714	* an error but return 0 to endicate an end of stream problem
715	*/
716	if (ctxt->input->end - ctxt->input->cur < 4) {
717	*len = 0;
718	return(0);
719	}
720
721	/*
722	* If we detect an UTF8 error that probably mean that the
723	* input encoding didn't get properly advertised in the
724	* declaration header. Report the error and switch the encoding
725	* to ISO-Latin-1 (if you don't like this policy, just declare the
726	* encoding !)
727	*/
728	{
729	char buffer[150];
730
731	snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
732	ctxt->input->cur[0], ctxt->input->cur[1],
733	ctxt->input->cur[2], ctxt->input->cur[3]);
734	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
735	"Input is not proper UTF-8, indicate encoding !\n%s",
736	BAD_CAST buffer, NULL);
737	}
738	ctxt->charset = XML_CHAR_ENCODING_8859_1;
739	*len = 1;
740	return((int) *ctxt->input->cur);
741	}
742
743	/**
744	* xmlStringCurrentChar:
745	* @ctxt: the XML parser context
746	* @cur: pointer to the beginning of the char
747	* @len: pointer to the length of the char read
748	*
749	* The current char value, if using UTF-8 this may actually span multiple
750	* bytes in the input buffer.
751	*
752	* Returns the current char value and its length
753	*/
754
755	int
756	xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
757	{
758	if ((len == NULL) \|\| (cur == NULL)) return(0);
759	if ((ctxt == NULL) \|\| (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
760	/*
761	* We are supposed to handle UTF8, check it's valid
762	* From rfc2044: encoding of the Unicode values on UTF-8:
763	*
764	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
765	* 0000 0000-0000 007F 0xxxxxxx
766	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
767	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
768	*
769	* Check for the 0x110000 limit too
770	*/
771	unsigned char c;
772	unsigned int val;
773
774	c = *cur;
775	if (c & 0x80) {
776	if ((cur[1] & 0xc0) != 0x80)
777	goto encoding_error;
778	if ((c & 0xe0) == 0xe0) {
779
780	if ((cur[2] & 0xc0) != 0x80)
781	goto encoding_error;
782	if ((c & 0xf0) == 0xf0) {
783	if (((c & 0xf8) != 0xf0) \|\| ((cur[3] & 0xc0) != 0x80))
784	goto encoding_error;
785	/* 4-byte code */
786	*len = 4;
787	val = (cur[0] & 0x7) << 18;
788	val \|= (cur[1] & 0x3f) << 12;
789	val \|= (cur[2] & 0x3f) << 6;
790	val \|= cur[3] & 0x3f;
791	} else {
792	/* 3-byte code */
793	*len = 3;
794	val = (cur[0] & 0xf) << 12;
795	val \|= (cur[1] & 0x3f) << 6;
796	val \|= cur[2] & 0x3f;
797	}
798	} else {
799	/* 2-byte code */
800	*len = 2;
801	val = (cur[0] & 0x1f) << 6;
802	val \|= cur[1] & 0x3f;
803	}
804	if (!IS_CHAR(val)) {
805	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
806	"Char 0x%X out of allowed range\n", val);
807	}
808	return (val);
809	} else {
810	/* 1-byte code */
811	*len = 1;
812	return ((int) *cur);
813	}
814	}
815	/*
816	* Assume it's a fixed length encoding (1) with
817	* a compatible encoding for the ASCII set, since
818	* XML constructs only use < 128 chars
819	*/
820	*len = 1;
821	return ((int) *cur);
822	encoding_error:
823
824	/*
825	* An encoding problem may arise from a truncated input buffer
826	* splitting a character in the middle. In that case do not raise
827	* an error but return 0 to endicate an end of stream problem
828	*/
829	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
830	(ctxt->input->end - ctxt->input->cur < 4)) {
831	*len = 0;
832	return(0);
833	}
834	/*
835	* If we detect an UTF8 error that probably mean that the
836	* input encoding didn't get properly advertised in the
837	* declaration header. Report the error and switch the encoding
838	* to ISO-Latin-1 (if you don't like this policy, just declare the
839	* encoding !)
840	*/
841	{
842	char buffer[150];
843
844	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
845	ctxt->input->cur[0], ctxt->input->cur[1],
846	ctxt->input->cur[2], ctxt->input->cur[3]);
847	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
848	"Input is not proper UTF-8, indicate encoding !\n%s",
849	BAD_CAST buffer, NULL);
850	}
851	*len = 1;
852	return ((int) *cur);
853	}
854
855	/**
856	* xmlCopyCharMultiByte:
857	* @out: pointer to an array of xmlChar
858	* @val: the char value
859	*
860	* append the char value in the array
861	*
862	* Returns the number of xmlChar written
863	*/
864	int
865	xmlCopyCharMultiByte(xmlChar *out, int val) {
866	if (out == NULL) return(0);
867	/*
868	* We are supposed to handle UTF8, check it's valid
869	* From rfc2044: encoding of the Unicode values on UTF-8:
870	*
871	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
872	* 0000 0000-0000 007F 0xxxxxxx
873	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
874	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
875	*/
876	if (val >= 0x80) {
877	xmlChar *savedout = out;
878	int bits;
879	if (val < 0x800) { *out++= (val >> 6) \| 0xC0; bits= 0; }
880	else if (val < 0x10000) { *out++= (val >> 12) \| 0xE0; bits= 6;}
881	else if (val < 0x110000) { *out++= (val >> 18) \| 0xF0; bits= 12; }
882	else {
883	xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
884	"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
885	val);
886	return(0);
887	}
888	for ( ; bits >= 0; bits-= 6)
889	*out++= ((val >> bits) & 0x3F) \| 0x80 ;
890	return (out - savedout);
891	}
892	*out = (xmlChar) val;
893	return 1;
894	}
895
896	/**
897	* xmlCopyChar:
898	* @len: Ignored, compatibility
899	* @out: pointer to an array of xmlChar
900	* @val: the char value
901	*
902	* append the char value in the array
903	*
904	* Returns the number of xmlChar written
905	*/
906
907	int
908	xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
909	if (out == NULL) return(0);
910	/* the len parameter is ignored */
911	if (val >= 0x80) {
912	return(xmlCopyCharMultiByte (out, val));
913	}
914	*out = (xmlChar) val;
915	return 1;
916	}
917
918	/************************************************************************
919	* *
920	* Commodity functions to switch encodings *
921	* *
922	************************************************************************/
923
924	/**
925	* xmlSwitchEncoding:
926	* @ctxt: the parser context
927	* @enc: the encoding value (number)
928	*
929	* change the input functions when discovering the character encoding
930	* of a given entity.
931	*
932	* Returns 0 in case of success, -1 otherwise
933	*/
934	int
935	xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
936	{
937	xmlCharEncodingHandlerPtr handler;
938
939	if (ctxt == NULL) return(-1);
940	switch (enc) {
941	case XML_CHAR_ENCODING_ERROR:
942	__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
943	"encoding unknown\n", NULL, NULL);
944	return(-1);
945	case XML_CHAR_ENCODING_NONE:
946	/* let's assume it's UTF-8 without the XML decl */
947	ctxt->charset = XML_CHAR_ENCODING_UTF8;
948	return(0);
949	case XML_CHAR_ENCODING_UTF8:
950	/* default encoding, no conversion should be needed */
951	ctxt->charset = XML_CHAR_ENCODING_UTF8;
952
953	/*
954	* Errata on XML-1.0 June 20 2001
955	* Specific handling of the Byte Order Mark for
956	* UTF-8
957	*/
958	if ((ctxt->input != NULL) &&
959	(ctxt->input->cur[0] == 0xEF) &&
960	(ctxt->input->cur[1] == 0xBB) &&
961	(ctxt->input->cur[2] == 0xBF)) {
962	ctxt->input->cur += 3;
963	}
964	return(0);
965	case XML_CHAR_ENCODING_UTF16LE:
966	case XML_CHAR_ENCODING_UTF16BE:
967	/*The raw input characters are encoded
968	*in UTF-16. As we expect this function
969	*to be called after xmlCharEncInFunc, we expect
970	*ctxt->input->cur to contain UTF-8 encoded characters.
971	*So the raw UTF16 Byte Order Mark
972	*has also been converted into
973	*an UTF-8 BOM. Let's skip that BOM.
974	*/
975	if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
976	(ctxt->input->cur[0] == 0xEF) &&
977	(ctxt->input->cur[1] == 0xBB) &&
978	(ctxt->input->cur[2] == 0xBF)) {
979	ctxt->input->cur += 3;
980	}
981	break ;
982	default:
983	break;
984	}
985	handler = xmlGetCharEncodingHandler(enc);
986	if (handler == NULL) {
987	/*
988	* Default handlers.
989	*/
990	switch (enc) {
991	case XML_CHAR_ENCODING_ASCII:
992	/* default encoding, no conversion should be needed */
993	ctxt->charset = XML_CHAR_ENCODING_UTF8;
994	return(0);
995	case XML_CHAR_ENCODING_UTF16LE:
996	break;
997	case XML_CHAR_ENCODING_UTF16BE:
998	break;
999	case XML_CHAR_ENCODING_UCS4LE:
1000	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1001	"encoding not supported %s\n",
1002	BAD_CAST "USC4 little endian", NULL);
1003	break;
1004	case XML_CHAR_ENCODING_UCS4BE:
1005	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1006	"encoding not supported %s\n",
1007	BAD_CAST "USC4 big endian", NULL);
1008	break;
1009	case XML_CHAR_ENCODING_EBCDIC:
1010	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1011	"encoding not supported %s\n",
1012	BAD_CAST "EBCDIC", NULL);
1013	break;
1014	case XML_CHAR_ENCODING_UCS4_2143:
1015	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1016	"encoding not supported %s\n",
1017	BAD_CAST "UCS4 2143", NULL);
1018	break;
1019	case XML_CHAR_ENCODING_UCS4_3412:
1020	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1021	"encoding not supported %s\n",
1022	BAD_CAST "UCS4 3412", NULL);
1023	break;
1024	case XML_CHAR_ENCODING_UCS2:
1025	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1026	"encoding not supported %s\n",
1027	BAD_CAST "UCS2", NULL);
1028	break;
1029	case XML_CHAR_ENCODING_8859_1:
1030	case XML_CHAR_ENCODING_8859_2:
1031	case XML_CHAR_ENCODING_8859_3:
1032	case XML_CHAR_ENCODING_8859_4:
1033	case XML_CHAR_ENCODING_8859_5:
1034	case XML_CHAR_ENCODING_8859_6:
1035	case XML_CHAR_ENCODING_8859_7:
1036	case XML_CHAR_ENCODING_8859_8:
1037	case XML_CHAR_ENCODING_8859_9:
1038	/*
1039	* We used to keep the internal content in the
1040	* document encoding however this turns being unmaintainable
1041	* So xmlGetCharEncodingHandler() will return non-null
1042	* values for this now.
1043	*/
1044	if ((ctxt->inputNr == 1) &&
1045	(ctxt->encoding == NULL) &&
1046	(ctxt->input != NULL) &&
1047	(ctxt->input->encoding != NULL)) {
1048	ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1049	}
1050	ctxt->charset = enc;
1051	return(0);
1052	case XML_CHAR_ENCODING_2022_JP:
1053	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1054	"encoding not supported %s\n",
1055	BAD_CAST "ISO-2022-JP", NULL);
1056	break;
1057	case XML_CHAR_ENCODING_SHIFT_JIS:
1058	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1059	"encoding not supported %s\n",
1060	BAD_CAST "Shift_JIS", NULL);
1061	break;
1062	case XML_CHAR_ENCODING_EUC_JP:
1063	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1064	"encoding not supported %s\n",
1065	BAD_CAST "EUC-JP", NULL);
1066	break;
1067	default:
1068	break;
1069	}
1070	}
1071	if (handler == NULL)
1072	return(-1);
1073	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1074	return(xmlSwitchToEncoding(ctxt, handler));
1075	}
1076
1077	/**
1078	* xmlSwitchInputEncoding:
1079	* @ctxt: the parser context
1080	* @input: the input stream
1081	* @handler: the encoding handler
1082	*
1083	* change the input functions when discovering the character encoding
1084	* of a given entity.
1085	*
1086	* Returns 0 in case of success, -1 otherwise
1087	*/
1088	int
1089	xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1090	xmlCharEncodingHandlerPtr handler)
1091	{
1092	int nbchars;
1093
1094	if (handler == NULL)
1095	return (-1);
1096	if (input == NULL)
1097	return (-1);
1098	if (input->buf != NULL) {
1099	if (input->buf->encoder != NULL) {
1100	/*
1101	* Check in case the auto encoding detetection triggered
1102	* in already.
1103	*/
1104	if (input->buf->encoder == handler)
1105	return (0);
1106
1107	/*
1108	* "UTF-16" can be used for both LE and BE
1109	if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1110	BAD_CAST "UTF-16", 6)) &&
1111	(!xmlStrncmp(BAD_CAST handler->name,
1112	BAD_CAST "UTF-16", 6))) {
1113	return(0);
1114	}
1115	*/
1116
1117	/*
1118	* Note: this is a bit dangerous, but that's what it
1119	* takes to use nearly compatible signature for different
1120	* encodings.
1121	*/
1122	xmlCharEncCloseFunc(input->buf->encoder);
1123	input->buf->encoder = handler;
1124	return (0);
1125	}
1126	input->buf->encoder = handler;
1127
1128	/*
1129	* Is there already some content down the pipe to convert ?
1130	*/
1131	if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
1132	int processed;
1133	unsigned int use;
1134
1135	/*
1136	* Specific handling of the Byte Order Mark for
1137	* UTF-16
1138	*/
1139	if ((handler->name != NULL) &&
1140	(!strcmp(handler->name, "UTF-16LE") \|\|
1141	!strcmp(handler->name, "UTF-16")) &&
1142	(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1143	input->cur += 2;
1144	}
1145	if ((handler->name != NULL) &&
1146	(!strcmp(handler->name, "UTF-16BE")) &&
1147	(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1148	input->cur += 2;
1149	}
1150	/*
1151	* Errata on XML-1.0 June 20 2001
1152	* Specific handling of the Byte Order Mark for
1153	* UTF-8
1154	*/
1155	if ((handler->name != NULL) &&
1156	(!strcmp(handler->name, "UTF-8")) &&
1157	(input->cur[0] == 0xEF) &&
1158	(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1159	input->cur += 3;
1160	}
1161
1162	/*
1163	* Shrink the current input buffer.
1164	* Move it as the raw buffer and create a new input buffer
1165	*/
1166	processed = input->cur - input->base;
1167	xmlBufferShrink(input->buf->buffer, processed);
1168	input->buf->raw = input->buf->buffer;
1169	input->buf->buffer = xmlBufferCreate();
1170	input->buf->rawconsumed = processed;
1171	use = input->buf->raw->use;
1172
1173	if (ctxt->html) {
1174	/*
1175	* convert as much as possible of the buffer
1176	*/
1177	nbchars = xmlCharEncInFunc(input->buf->encoder,
1178	input->buf->buffer,
1179	input->buf->raw);
1180	} else {
1181	/*
1182	* convert just enough to get
1183	* '<?xml version="1.0" encoding="xxx"?>'
1184	* parsed with the autodetected encoding
1185	* into the parser reading buffer.
1186	*/
1187	nbchars = xmlCharEncFirstLine(input->buf->encoder,
1188	input->buf->buffer,
1189	input->buf->raw);
1190	}
1191	if (nbchars < 0) {
1192	xmlErrInternal(ctxt,
1193	"switching encoding: encoder error\n",
1194	NULL);
1195	return (-1);
1196	}
1197	input->buf->rawconsumed += use - input->buf->raw->use;
1198	input->base = input->cur = input->buf->buffer->content;
1199	input->end = &input->base[input->buf->buffer->use];
1200
1201	}
1202	return (0);
1203	} else if (input->length == 0) {
1204	/*
1205	* When parsing a static memory array one must know the
1206	* size to be able to convert the buffer.
1207	*/
1208	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1209	return (-1);
1210	}
1211	return (0);
1212	}
1213
1214	/**
1215	* xmlSwitchToEncoding:
1216	* @ctxt: the parser context
1217	* @handler: the encoding handler
1218	*
1219	* change the input functions when discovering the character encoding
1220	* of a given entity.
1221	*
1222	* Returns 0 in case of success, -1 otherwise
1223	*/
1224	int
1225	xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1226	{
1227	int ret = 0;
1228
1229	if (handler != NULL) {
1230	if (ctxt->input != NULL) {
1231	ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1232	} else {
1233	xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1234	NULL);
1235	return(-1);
1236	}
1237	/*
1238	* The parsing is now done in UTF8 natively
1239	*/
1240	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1241	} else
1242	return(-1);
1243	return(ret);
1244	}
1245
1246	/************************************************************************
1247	* *
1248	* Commodity functions to handle entities processing *
1249	* *
1250	************************************************************************/
1251
1252	/**
1253	* xmlFreeInputStream:
1254	* @input: an xmlParserInputPtr
1255	*
1256	* Free up an input stream.
1257	*/
1258	void
1259	xmlFreeInputStream(xmlParserInputPtr input) {
1260	if (input == NULL) return;
1261
1262	if (input->filename != NULL) xmlFree((char *) input->filename);
1263	if (input->directory != NULL) xmlFree((char *) input->directory);
1264	if (input->encoding != NULL) xmlFree((char *) input->encoding);
1265	if (input->version != NULL) xmlFree((char *) input->version);
1266	if ((input->free != NULL) && (input->base != NULL))
1267	input->free((xmlChar *) input->base);
1268	if (input->buf != NULL)
1269	xmlFreeParserInputBuffer(input->buf);
1270	xmlFree(input);
1271	}
1272
1273	/**
1274	* xmlNewInputStream:
1275	* @ctxt: an XML parser context
1276	*
1277	* Create a new input stream structure
1278	* Returns the new input stream or NULL
1279	*/
1280	xmlParserInputPtr
1281	xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1282	xmlParserInputPtr input;
1283	static int id = 0;
1284
1285	input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1286	if (input == NULL) {
1287	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1288	return(NULL);
1289	}
1290	memset(input, 0, sizeof(xmlParserInput));
1291	input->line = 1;
1292	input->col = 1;
1293	input->standalone = -1;
1294	/*
1295	* we don't care about thread reentrancy unicity for a single
1296	* parser context (and hence thread) is sufficient.
1297	*/
1298	input->id = id++;
1299	return(input);
1300	}
1301
1302	/**
1303	* xmlNewIOInputStream:
1304	* @ctxt: an XML parser context
1305	* @input: an I/O Input
1306	* @enc: the charset encoding if known
1307	*
1308	* Create a new input stream structure encapsulating the @input into
1309	* a stream suitable for the parser.
1310	*
1311	* Returns the new input stream or NULL
1312	*/
1313	xmlParserInputPtr
1314	xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1315	xmlCharEncoding enc) {
1316	xmlParserInputPtr inputStream;
1317
1318	if (input == NULL) return(NULL);
1319	if (xmlParserDebugEntities)
1320	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1321	inputStream = xmlNewInputStream(ctxt);
1322	if (inputStream == NULL) {
1323	return(NULL);
1324	}
1325	inputStream->filename = NULL;
1326	inputStream->buf = input;
1327	inputStream->base = inputStream->buf->buffer->content;
1328	inputStream->cur = inputStream->buf->buffer->content;
1329	inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1330	if (enc != XML_CHAR_ENCODING_NONE) {
1331	xmlSwitchEncoding(ctxt, enc);
1332	}
1333
1334	return(inputStream);
1335	}
1336
1337	/**
1338	* xmlNewEntityInputStream:
1339	* @ctxt: an XML parser context
1340	* @entity: an Entity pointer
1341	*
1342	* Create a new input stream based on an xmlEntityPtr
1343	*
1344	* Returns the new input stream or NULL
1345	*/
1346	xmlParserInputPtr
1347	xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1348	xmlParserInputPtr input;
1349
1350	if (entity == NULL) {
1351	xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1352	NULL);
1353	return(NULL);
1354	}
1355	if (xmlParserDebugEntities)
1356	xmlGenericError(xmlGenericErrorContext,
1357	"new input from entity: %s\n", entity->name);
1358	if (entity->content == NULL) {
1359	switch (entity->etype) {
1360	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1361	xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1362	entity->name);
1363	break;
1364	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1365	case XML_EXTERNAL_PARAMETER_ENTITY:
1366	return(xmlLoadExternalEntity((char *) entity->URI,
1367	(char *) entity->ExternalID, ctxt));
1368	case XML_INTERNAL_GENERAL_ENTITY:
1369	xmlErrInternal(ctxt,
1370	"Internal entity %s without content !\n",
1371	entity->name);
1372	break;
1373	case XML_INTERNAL_PARAMETER_ENTITY:
1374	xmlErrInternal(ctxt,
1375	"Internal parameter entity %s without content !\n",
1376	entity->name);
1377	break;
1378	case XML_INTERNAL_PREDEFINED_ENTITY:
1379	xmlErrInternal(ctxt,
1380	"Predefined entity %s without content !\n",
1381	entity->name);
1382	break;
1383	}
1384	return(NULL);
1385	}
1386	input = xmlNewInputStream(ctxt);
1387	if (input == NULL) {
1388	return(NULL);
1389	}
1390	input->filename = (char *) entity->URI;
1391	input->base = entity->content;
1392	input->cur = entity->content;
1393	input->length = entity->length;
1394	input->end = &entity->content[input->length];
1395	return(input);
1396	}
1397
1398	/**
1399	* xmlNewStringInputStream:
1400	* @ctxt: an XML parser context
1401	* @buffer: an memory buffer
1402	*
1403	* Create a new input stream based on a memory buffer.
1404	* Returns the new input stream
1405	*/
1406	xmlParserInputPtr
1407	xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1408	xmlParserInputPtr input;
1409
1410	if (buffer == NULL) {
1411	xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1412	NULL);
1413	return(NULL);
1414	}
1415	if (xmlParserDebugEntities)
1416	xmlGenericError(xmlGenericErrorContext,
1417	"new fixed input: %.30s\n", buffer);
1418	input = xmlNewInputStream(ctxt);
1419	if (input == NULL) {
1420	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1421	return(NULL);
1422	}
1423	input->base = buffer;
1424	input->cur = buffer;
1425	input->length = xmlStrlen(buffer);
1426	input->end = &buffer[input->length];
1427	return(input);
1428	}
1429
1430	/**
1431	* xmlNewInputFromFile:
1432	* @ctxt: an XML parser context
1433	* @filename: the filename to use as entity
1434	*
1435	* Create a new input stream based on a file or an URL.
1436	*
1437	* Returns the new input stream or NULL in case of error
1438	*/
1439	xmlParserInputPtr
1440	xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1441	xmlParserInputBufferPtr buf;
1442	xmlParserInputPtr inputStream;
1443	char *directory = NULL;
1444	xmlChar *URI = NULL;
1445
1446	if (xmlParserDebugEntities)
1447	xmlGenericError(xmlGenericErrorContext,
1448	"new input from file: %s\n", filename);
1449	if (ctxt == NULL) return(NULL);
1450	buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1451	if (buf == NULL) {
1452	if (filename == NULL)
1453	__xmlLoaderErr(ctxt,
1454	"failed to load external entity: NULL filename \n",
1455	NULL);
1456	else
1457	__xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1458	(const char *) filename);
1459	return(NULL);
1460	}
1461
1462	inputStream = xmlNewInputStream(ctxt);
1463	if (inputStream == NULL)
1464	return(NULL);
1465
1466	inputStream->buf = buf;
1467	inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1468	if (inputStream == NULL)
1469	return(NULL);
1470
1471	if (inputStream->filename == NULL)
1472	URI = xmlStrdup((xmlChar *) filename);
1473	else
1474	URI = xmlStrdup((xmlChar *) inputStream->filename);
1475	directory = xmlParserGetDirectory((const char *) URI);
1476	if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1477	inputStream->filename = (char ) xmlCanonicPath((const xmlChar ) URI);
1478	if (URI != NULL) xmlFree((char *) URI);
1479	inputStream->directory = directory;
1480
1481	inputStream->base = inputStream->buf->buffer->content;
1482	inputStream->cur = inputStream->buf->buffer->content;
1483	inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
1484	if ((ctxt->directory == NULL) && (directory != NULL))
1485	ctxt->directory = (char ) xmlStrdup((const xmlChar ) directory);
1486	return(inputStream);
1487	}
1488
1489	/************************************************************************
1490	* *
1491	* Commodity functions to handle parser contexts *
1492	* *
1493	************************************************************************/
1494
1495	/**
1496	* xmlInitParserCtxt:
1497	* @ctxt: an XML parser context
1498	*
1499	* Initialize a parser context
1500	*
1501	* Returns 0 in case of success and -1 in case of error
1502	*/
1503
1504	int
1505	xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1506	{
1507	xmlParserInputPtr input;
1508
1509	if(ctxt==NULL) {
1510	xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1511	return(-1);
1512	}
1513
1514	xmlDefaultSAXHandlerInit();
1515
1516	if (ctxt->dict == NULL)
1517	ctxt->dict = xmlDictCreate();
1518	if (ctxt->dict == NULL) {
1519	xmlErrMemory(NULL, "cannot initialize parser context\n");
1520	return(-1);
1521	}
1522	if (ctxt->sax == NULL)
1523	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1524	if (ctxt->sax == NULL) {
1525	xmlErrMemory(NULL, "cannot initialize parser context\n");
1526	return(-1);
1527	}
1528	else
1529	xmlSAXVersion(ctxt->sax, 2);
1530
1531	ctxt->maxatts = 0;
1532	ctxt->atts = NULL;
1533	/* Allocate the Input stack */
1534	if (ctxt->inputTab == NULL) {
1535	ctxt->inputTab = (xmlParserInputPtr *)
1536	xmlMalloc(5 * sizeof(xmlParserInputPtr));
1537	ctxt->inputMax = 5;
1538	}
1539	if (ctxt->inputTab == NULL) {
1540	xmlErrMemory(NULL, "cannot initialize parser context\n");
1541	ctxt->inputNr = 0;
1542	ctxt->inputMax = 0;
1543	ctxt->input = NULL;
1544	return(-1);
1545	}
1546	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1547	xmlFreeInputStream(input);
1548	}
1549	ctxt->inputNr = 0;
1550	ctxt->input = NULL;
1551
1552	ctxt->version = NULL;
1553	ctxt->encoding = NULL;
1554	ctxt->standalone = -1;
1555	ctxt->hasExternalSubset = 0;
1556	ctxt->hasPErefs = 0;
1557	ctxt->html = 0;
1558	ctxt->external = 0;
1559	ctxt->instate = XML_PARSER_START;
1560	ctxt->token = 0;
1561	ctxt->directory = NULL;
1562
1563	/* Allocate the Node stack */
1564	if (ctxt->nodeTab == NULL) {
1565	ctxt->nodeTab = (xmlNodePtr ) xmlMalloc(10 sizeof(xmlNodePtr));
1566	ctxt->nodeMax = 10;
1567	}
1568	if (ctxt->nodeTab == NULL) {
1569	xmlErrMemory(NULL, "cannot initialize parser context\n");
1570	ctxt->nodeNr = 0;
1571	ctxt->nodeMax = 0;
1572	ctxt->node = NULL;
1573	ctxt->inputNr = 0;
1574	ctxt->inputMax = 0;
1575	ctxt->input = NULL;
1576	return(-1);
1577	}
1578	ctxt->nodeNr = 0;
1579	ctxt->node = NULL;
1580
1581	/* Allocate the Name stack */
1582	if (ctxt->nameTab == NULL) {
1583	ctxt->nameTab = (const xmlChar *) xmlMalloc(10 sizeof(xmlChar *));
1584	ctxt->nameMax = 10;
1585	}
1586	if (ctxt->nameTab == NULL) {
1587	xmlErrMemory(NULL, "cannot initialize parser context\n");
1588	ctxt->nodeNr = 0;
1589	ctxt->nodeMax = 0;
1590	ctxt->node = NULL;
1591	ctxt->inputNr = 0;
1592	ctxt->inputMax = 0;
1593	ctxt->input = NULL;
1594	ctxt->nameNr = 0;
1595	ctxt->nameMax = 0;
1596	ctxt->name = NULL;
1597	return(-1);
1598	}
1599	ctxt->nameNr = 0;
1600	ctxt->name = NULL;
1601
1602	/* Allocate the space stack */
1603	if (ctxt->spaceTab == NULL) {
1604	ctxt->spaceTab = (int ) xmlMalloc(10 sizeof(int));
1605	ctxt->spaceMax = 10;
1606	}
1607	if (ctxt->spaceTab == NULL) {
1608	xmlErrMemory(NULL, "cannot initialize parser context\n");
1609	ctxt->nodeNr = 0;
1610	ctxt->nodeMax = 0;
1611	ctxt->node = NULL;
1612	ctxt->inputNr = 0;
1613	ctxt->inputMax = 0;
1614	ctxt->input = NULL;
1615	ctxt->nameNr = 0;
1616	ctxt->nameMax = 0;
1617	ctxt->name = NULL;
1618	ctxt->spaceNr = 0;
1619	ctxt->spaceMax = 0;
1620	ctxt->space = NULL;
1621	return(-1);
1622	}
1623	ctxt->spaceNr = 1;
1624	ctxt->spaceMax = 10;
1625	ctxt->spaceTab[0] = -1;
1626	ctxt->space = &ctxt->spaceTab[0];
1627	ctxt->userData = ctxt;
1628	ctxt->myDoc = NULL;
1629	ctxt->wellFormed = 1;
1630	ctxt->nsWellFormed = 1;
1631	ctxt->valid = 1;
1632	ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1633	ctxt->validate = xmlDoValidityCheckingDefaultValue;
1634	ctxt->pedantic = xmlPedanticParserDefaultValue;
1635	ctxt->linenumbers = xmlLineNumbersDefaultValue;
1636	ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1637	if (ctxt->keepBlanks == 0)
1638	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1639
1640	ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1641	ctxt->vctxt.userData = ctxt;
1642	ctxt->vctxt.error = xmlParserValidityError;
1643	ctxt->vctxt.warning = xmlParserValidityWarning;
1644	if (ctxt->validate) {
1645	if (xmlGetWarningsDefaultValue == 0)
1646	ctxt->vctxt.warning = NULL;
1647	else
1648	ctxt->vctxt.warning = xmlParserValidityWarning;
1649	ctxt->vctxt.nodeMax = 0;
1650	}
1651	ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1652	ctxt->record_info = 0;
1653	ctxt->nbChars = 0;
1654	ctxt->checkIndex = 0;
1655	ctxt->inSubset = 0;
1656	ctxt->errNo = XML_ERR_OK;
1657	ctxt->depth = 0;
1658	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1659	ctxt->catalogs = NULL;
1660	xmlInitNodeInfoSeq(&ctxt->node_seq);
1661	return(0);
1662	}
1663
1664	/**
1665	* xmlFreeParserCtxt:
1666	* @ctxt: an XML parser context
1667	*
1668	* Free all the memory used by a parser context. However the parsed
1669	* document in ctxt->myDoc is not freed.
1670	*/
1671
1672	void
1673	xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1674	{
1675	xmlParserInputPtr input;
1676
1677	if (ctxt == NULL) return;
1678
1679	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1680	xmlFreeInputStream(input);
1681	}
1682	if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1683	if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1684	if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1685	if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1686	if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1687	if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1688	if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1689	if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1690	#ifdef LIBXML_SAX1_ENABLED
1691	if ((ctxt->sax != NULL) &&
1692	(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1693	#else
1694	if (ctxt->sax != NULL)
1695	#endif /* LIBXML_SAX1_ENABLED */
1696	xmlFree(ctxt->sax);
1697	if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1698	if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1699	if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1700	if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1701	if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1702	if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1703	if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1704	if (ctxt->attsDefault != NULL)
1705	xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1706	if (ctxt->attsSpecial != NULL)
1707	xmlHashFree(ctxt->attsSpecial, NULL);
1708	if (ctxt->freeElems != NULL) {
1709	xmlNodePtr cur, next;
1710
1711	cur = ctxt->freeElems;
1712	while (cur != NULL) {
1713	next = cur->next;
1714	xmlFree(cur);
1715	cur = next;
1716	}
1717	}
1718	if (ctxt->freeAttrs != NULL) {
1719	xmlAttrPtr cur, next;
1720
1721	cur = ctxt->freeAttrs;
1722	while (cur != NULL) {
1723	next = cur->next;
1724	xmlFree(cur);
1725	cur = next;
1726	}
1727	}
1728	/*
1729	* cleanup the error strings
1730	*/
1731	if (ctxt->lastError.message != NULL)
1732	xmlFree(ctxt->lastError.message);
1733	if (ctxt->lastError.file != NULL)
1734	xmlFree(ctxt->lastError.file);
1735	if (ctxt->lastError.str1 != NULL)
1736	xmlFree(ctxt->lastError.str1);
1737	if (ctxt->lastError.str2 != NULL)
1738	xmlFree(ctxt->lastError.str2);
1739	if (ctxt->lastError.str3 != NULL)
1740	xmlFree(ctxt->lastError.str3);
1741
1742	#ifdef LIBXML_CATALOG_ENABLED
1743	if (ctxt->catalogs != NULL)
1744	xmlCatalogFreeLocal(ctxt->catalogs);
1745	#endif
1746	xmlFree(ctxt);
1747	}
1748
1749	/**
1750	* xmlNewParserCtxt:
1751	*
1752	* Allocate and initialize a new parser context.
1753	*
1754	* Returns the xmlParserCtxtPtr or NULL
1755	*/
1756
1757	xmlParserCtxtPtr
1758	xmlNewParserCtxt(void)
1759	{
1760	xmlParserCtxtPtr ctxt;
1761
1762	ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1763	if (ctxt == NULL) {
1764	xmlErrMemory(NULL, "cannot allocate parser context\n");
1765	return(NULL);
1766	}
1767	memset(ctxt, 0, sizeof(xmlParserCtxt));
1768	if (xmlInitParserCtxt(ctxt) < 0) {
1769	xmlFreeParserCtxt(ctxt);
1770	return(NULL);
1771	}
1772	return(ctxt);
1773	}
1774
1775	/************************************************************************
1776	* *
1777	* Handling of node informations *
1778	* *
1779	************************************************************************/
1780
1781	/**
1782	* xmlClearParserCtxt:
1783	* @ctxt: an XML parser context
1784	*
1785	* Clear (release owned resources) and reinitialize a parser context
1786	*/
1787
1788	void
1789	xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1790	{
1791	if (ctxt==NULL)
1792	return;
1793	xmlClearNodeInfoSeq(&ctxt->node_seq);
1794	xmlCtxtReset(ctxt);
1795	}
1796
1797
1798	/**
1799	* xmlParserFindNodeInfo:
1800	* @ctx: an XML parser context
1801	* @node: an XML node within the tree
1802	*
1803	* Find the parser node info struct for a given node
1804	*
1805	* Returns an xmlParserNodeInfo block pointer or NULL
1806	*/
1807	const xmlParserNodeInfo *
1808	xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1809	{
1810	unsigned long pos;
1811
1812	if ((ctx == NULL) \|\| (node == NULL))
1813	return (NULL);
1814	/* Find position where node should be at */
1815	pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1816	if (pos < ctx->node_seq.length
1817	&& ctx->node_seq.buffer[pos].node == node)
1818	return &ctx->node_seq.buffer[pos];
1819	else
1820	return NULL;
1821	}
1822
1823
1824	/**
1825	* xmlInitNodeInfoSeq:
1826	* @seq: a node info sequence pointer
1827	*
1828	* -- Initialize (set to initial state) node info sequence
1829	*/
1830	void
1831	xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1832	{
1833	if (seq == NULL)
1834	return;
1835	seq->length = 0;
1836	seq->maximum = 0;
1837	seq->buffer = NULL;
1838	}
1839
1840	/**
1841	* xmlClearNodeInfoSeq:
1842	* @seq: a node info sequence pointer
1843	*
1844	* -- Clear (release memory and reinitialize) node
1845	* info sequence
1846	*/
1847	void
1848	xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1849	{
1850	if (seq == NULL)
1851	return;
1852	if (seq->buffer != NULL)
1853	xmlFree(seq->buffer);
1854	xmlInitNodeInfoSeq(seq);
1855	}
1856
1857	/**
1858	* xmlParserFindNodeInfoIndex:
1859	* @seq: a node info sequence pointer
1860	* @node: an XML node pointer
1861	*
1862	*
1863	* xmlParserFindNodeInfoIndex : Find the index that the info record for
1864	* the given node is or should be at in a sorted sequence
1865	*
1866	* Returns a long indicating the position of the record
1867	*/
1868	unsigned long
1869	xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1870	const xmlNodePtr node)
1871	{
1872	unsigned long upper, lower, middle;
1873	int found = 0;
1874
1875	if ((seq == NULL) \|\| (node == NULL))
1876	return ((unsigned long) -1);
1877
1878	/* Do a binary search for the key */
1879	lower = 1;
1880	upper = seq->length;
1881	middle = 0;
1882	while (lower <= upper && !found) {
1883	middle = lower + (upper - lower) / 2;
1884	if (node == seq->buffer[middle - 1].node)
1885	found = 1;
1886	else if (node < seq->buffer[middle - 1].node)
1887	upper = middle - 1;
1888	else
1889	lower = middle + 1;
1890	}
1891
1892	/* Return position */
1893	if (middle == 0 \|\| seq->buffer[middle - 1].node < node)
1894	return middle;
1895	else
1896	return middle - 1;
1897	}
1898
1899
1900	/**
1901	* xmlParserAddNodeInfo:
1902	* @ctxt: an XML parser context
1903	* @info: a node info sequence pointer
1904	*
1905	* Insert node info record into the sorted sequence
1906	*/
1907	void
1908	xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1909	const xmlParserNodeInfoPtr info)
1910	{
1911	unsigned long pos;
1912
1913	if ((ctxt == NULL) \|\| (info == NULL)) return;
1914
1915	/* Find pos and check to see if node is already in the sequence */
1916	pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1917	info->node);
1918
1919	if ((pos < ctxt->node_seq.length) &&
1920	(ctxt->node_seq.buffer != NULL) &&
1921	(ctxt->node_seq.buffer[pos].node == info->node)) {
1922	ctxt->node_seq.buffer[pos] = *info;
1923	}
1924
1925	/* Otherwise, we need to add new node to buffer */
1926	else {
1927	if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
1928	xmlParserNodeInfo *tmp_buffer;
1929	unsigned int byte_size;
1930
1931	if (ctxt->node_seq.maximum == 0)
1932	ctxt->node_seq.maximum = 2;
1933	byte_size = (sizeof(ctxt->node_seq.buffer)
1934	(2 * ctxt->node_seq.maximum));
1935
1936	if (ctxt->node_seq.buffer == NULL)
1937	tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
1938	else
1939	tmp_buffer =
1940	(xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
1941	byte_size);
1942
1943	if (tmp_buffer == NULL) {
1944	xmlErrMemory(ctxt, "failed to allocate buffer\n");
1945	return;
1946	}
1947	ctxt->node_seq.buffer = tmp_buffer;
1948	ctxt->node_seq.maximum *= 2;
1949	}
1950
1951	/* If position is not at end, move elements out of the way */
1952	if (pos != ctxt->node_seq.length) {
1953	unsigned long i;
1954
1955	for (i = ctxt->node_seq.length; i > pos; i--)
1956	ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1957	}
1958
1959	/* Copy element and increase length */
1960	ctxt->node_seq.buffer[pos] = *info;
1961	ctxt->node_seq.length++;
1962	}
1963	}
1964
1965	/************************************************************************
1966	* *
1967	* Defaults settings *
1968	* *
1969	************************************************************************/
1970	/**
1971	* xmlPedanticParserDefault:
1972	* @val: int 0 or 1
1973	*
1974	* Set and return the previous value for enabling pedantic warnings.
1975	*
1976	* Returns the last value for 0 for no substitution, 1 for substitution.
1977	*/
1978
1979	int
1980	xmlPedanticParserDefault(int val) {
1981	int old = xmlPedanticParserDefaultValue;
1982
1983	xmlPedanticParserDefaultValue = val;
1984	return(old);
1985	}
1986
1987	/**
1988	* xmlLineNumbersDefault:
1989	* @val: int 0 or 1
1990	*
1991	* Set and return the previous value for enabling line numbers in elements
1992	* contents. This may break on old application and is turned off by default.
1993	*
1994	* Returns the last value for 0 for no substitution, 1 for substitution.
1995	*/
1996
1997	int
1998	xmlLineNumbersDefault(int val) {
1999	int old = xmlLineNumbersDefaultValue;
2000
2001	xmlLineNumbersDefaultValue = val;
2002	return(old);
2003	}
2004
2005	/**
2006	* xmlSubstituteEntitiesDefault:
2007	* @val: int 0 or 1
2008	*
2009	* Set and return the previous value for default entity support.
2010	* Initially the parser always keep entity references instead of substituting
2011	* entity values in the output. This function has to be used to change the
2012	* default parser behavior
2013	* SAX::substituteEntities() has to be used for changing that on a file by
2014	* file basis.
2015	*
2016	* Returns the last value for 0 for no substitution, 1 for substitution.
2017	*/
2018
2019	int
2020	xmlSubstituteEntitiesDefault(int val) {
2021	int old = xmlSubstituteEntitiesDefaultValue;
2022
2023	xmlSubstituteEntitiesDefaultValue = val;
2024	return(old);
2025	}
2026
2027	/**
2028	* xmlKeepBlanksDefault:
2029	* @val: int 0 or 1
2030	*
2031	* Set and return the previous value for default blanks text nodes support.
2032	* The 1.x version of the parser used an heuristic to try to detect
2033	* ignorable white spaces. As a result the SAX callback was generating
2034	* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2035	* using the DOM output text nodes containing those blanks were not generated.
2036	* The 2.x and later version will switch to the XML standard way and
2037	* ignorableWhitespace() are only generated when running the parser in
2038	* validating mode and when the current element doesn't allow CDATA or
2039	* mixed content.
2040	* This function is provided as a way to force the standard behavior
2041	* on 1.X libs and to switch back to the old mode for compatibility when
2042	* running 1.X client code on 2.X . Upgrade of 1.X code should be done
2043	* by using xmlIsBlankNode() commodity function to detect the "empty"
2044	* nodes generated.
2045	* This value also affect autogeneration of indentation when saving code
2046	* if blanks sections are kept, indentation is not generated.
2047	*
2048	* Returns the last value for 0 for no substitution, 1 for substitution.
2049	*/
2050
2051	int
2052	xmlKeepBlanksDefault(int val) {
2053	int old = xmlKeepBlanksDefaultValue;
2054
2055	xmlKeepBlanksDefaultValue = val;
2056	xmlIndentTreeOutput = !val;
2057	return(old);
2058	}
2059
2060	#define bottom_parserInternals
2061	#include "elfgcchack.h"

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/libxml2-2.6.30/parserInternals.c@ 15236

Download in other formats: