parserInternals.c@ 93943

Last change on this file since 93943 was 65950, checked in by vboxsync, 8 years ago
libxml 2.9.4: fix export
Property svn:eol-style set to `native`
File size: 61.2 KB

Line
1	/*
2	* parserInternals.c : Internal routines (and obsolete ones) needed for the
3	* XML and HTML parsers.
4	*
5	* See Copyright for the status of this software.
6	*
7	* daniel@veillard.com
8	*/
9
10	#define IN_LIBXML
11	#include "libxml.h"
12
13	#if defined(WIN32) && !defined (__CYGWIN__)
14	#define XML_DIR_SEP '\\'
15	#else
16	#define XML_DIR_SEP '/'
17	#endif
18
19	#include <string.h>
20	#ifdef HAVE_CTYPE_H
21	#include <ctype.h>
22	#endif
23	#ifdef HAVE_STDLIB_H
24	#include <stdlib.h>
25	#endif
26	#ifdef HAVE_SYS_STAT_H
27	#include <sys/stat.h>
28	#endif
29	#ifdef HAVE_FCNTL_H
30	#include <fcntl.h>
31	#endif
32	#ifdef HAVE_UNISTD_H
33	#include <unistd.h>
34	#endif
35	#ifdef HAVE_ZLIB_H
36	#include <zlib.h>
37	#endif
38
39	#include <libxml/xmlmemory.h>
40	#include <libxml/tree.h>
41	#include <libxml/parser.h>
42	#include <libxml/parserInternals.h>
43	#include <libxml/valid.h>
44	#include <libxml/entities.h>
45	#include <libxml/xmlerror.h>
46	#include <libxml/encoding.h>
47	#include <libxml/valid.h>
48	#include <libxml/xmlIO.h>
49	#include <libxml/uri.h>
50	#include <libxml/dict.h>
51	#include <libxml/SAX.h>
52	#ifdef LIBXML_CATALOG_ENABLED
53	#include <libxml/catalog.h>
54	#endif
55	#include <libxml/globals.h>
56	#include <libxml/chvalid.h>
57
58	#define CUR(ctxt) ctxt->input->cur
59	#define END(ctxt) ctxt->input->end
60	#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
61
62	#include "buf.h"
63	#include "enc.h"
64
65	/*
66	* Various global defaults for parsing
67	*/
68
69	/**
70	* xmlCheckVersion:
71	* @version: the include version number
72	*
73	* check the compiled lib version against the include one.
74	* This can warn or immediately kill the application
75	*/
76	void
77	xmlCheckVersion(int version) {
78	int myversion = (int) LIBXML_VERSION;
79
80	xmlInitParser();
81
82	if ((myversion / 10000) != (version / 10000)) {
83	xmlGenericError(xmlGenericErrorContext,
84	"Fatal: program compiled against libxml %d using libxml %d\n",
85	(version / 10000), (myversion / 10000));
86	fprintf(stderr,
87	"Fatal: program compiled against libxml %d using libxml %d\n",
88	(version / 10000), (myversion / 10000));
89	}
90	if ((myversion / 100) < (version / 100)) {
91	xmlGenericError(xmlGenericErrorContext,
92	"Warning: program compiled against libxml %d using older %d\n",
93	(version / 100), (myversion / 100));
94	}
95	}
96
97
98	/************************************************************************
99	* *
100	* Some factorized error routines *
101	* *
102	************************************************************************/
103
104
105	/**
106	* xmlErrMemory:
107	* @ctxt: an XML parser context
108	* @extra: extra informations
109	*
110	* Handle a redefinition of attribute error
111	*/
112	void
113	xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
114	{
115	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
116	(ctxt->instate == XML_PARSER_EOF))
117	return;
118	if (ctxt != NULL) {
119	ctxt->errNo = XML_ERR_NO_MEMORY;
120	ctxt->instate = XML_PARSER_EOF;
121	ctxt->disableSAX = 1;
122	}
123	if (extra)
124	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
125	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
126	NULL, NULL, 0, 0,
127	"Memory allocation failed : %s\n", extra);
128	else
129	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
130	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
131	NULL, NULL, 0, 0, "Memory allocation failed\n");
132	}
133
134	/**
135	* __xmlErrEncoding:
136	* @ctxt: an XML parser context
137	* @xmlerr: the error number
138	* @msg: the error message
139	* @str1: an string info
140	* @str2: an string info
141	*
142	* Handle an encoding error
143	*/
144	void
145	__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
146	const char msg, const xmlChar str1, const xmlChar * str2)
147	{
148	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
149	(ctxt->instate == XML_PARSER_EOF))
150	return;
151	if (ctxt != NULL)
152	ctxt->errNo = xmlerr;
153	__xmlRaiseError(NULL, NULL, NULL,
154	ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
155	NULL, 0, (const char ) str1, (const char ) str2,
156	NULL, 0, 0, msg, str1, str2);
157	if (ctxt != NULL) {
158	ctxt->wellFormed = 0;
159	if (ctxt->recovery == 0)
160	ctxt->disableSAX = 1;
161	}
162	}
163
164	/**
165	* xmlErrInternal:
166	* @ctxt: an XML parser context
167	* @msg: the error message
168	* @str: error informations
169	*
170	* Handle an internal error
171	*/
172	static void LIBXML_ATTR_FORMAT(2,0)
173	xmlErrInternal(xmlParserCtxtPtr ctxt, const char msg, const xmlChar str)
174	{
175	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
176	(ctxt->instate == XML_PARSER_EOF))
177	return;
178	if (ctxt != NULL)
179	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
180	__xmlRaiseError(NULL, NULL, NULL,
181	ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
182	XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
183	0, 0, msg, str);
184	if (ctxt != NULL) {
185	ctxt->wellFormed = 0;
186	if (ctxt->recovery == 0)
187	ctxt->disableSAX = 1;
188	}
189	}
190
191	/**
192	* xmlErrEncodingInt:
193	* @ctxt: an XML parser context
194	* @error: the error number
195	* @msg: the error message
196	* @val: an integer value
197	*
198	* n encoding error
199	*/
200	static void LIBXML_ATTR_FORMAT(3,0)
201	xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
202	const char *msg, int val)
203	{
204	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
205	(ctxt->instate == XML_PARSER_EOF))
206	return;
207	if (ctxt != NULL)
208	ctxt->errNo = error;
209	__xmlRaiseError(NULL, NULL, NULL,
210	ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
211	NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
212	if (ctxt != NULL) {
213	ctxt->wellFormed = 0;
214	if (ctxt->recovery == 0)
215	ctxt->disableSAX = 1;
216	}
217	}
218
219	/**
220	* xmlIsLetter:
221	* @c: an unicode character (int)
222	*
223	* Check whether the character is allowed by the production
224	* [84] Letter ::= BaseChar \| Ideographic
225	*
226	* Returns 0 if not, non-zero otherwise
227	*/
228	int
229	xmlIsLetter(int c) {
230	return(IS_BASECHAR(c) \|\| IS_IDEOGRAPHIC(c));
231	}
232
233	/************************************************************************
234	* *
235	* Input handling functions for progressive parsing *
236	* *
237	************************************************************************/
238
239	/* #define DEBUG_INPUT */
240	/* #define DEBUG_STACK */
241	/* #define DEBUG_PUSH */
242
243
244	/* we need to keep enough input to show errors in context */
245	#define LINE_LEN 80
246
247	#ifdef DEBUG_INPUT
248	#define CHECK_BUFFER(in) check_buffer(in)
249
250	static
251	void check_buffer(xmlParserInputPtr in) {
252	if (in->base != xmlBufContent(in->buf->buffer)) {
253	xmlGenericError(xmlGenericErrorContext,
254	"xmlParserInput: base mismatch problem\n");
255	}
256	if (in->cur < in->base) {
257	xmlGenericError(xmlGenericErrorContext,
258	"xmlParserInput: cur < base problem\n");
259	}
260	if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
261	xmlGenericError(xmlGenericErrorContext,
262	"xmlParserInput: cur > base + use problem\n");
263	}
264	xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
265	(int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
266	xmlBufUse(in->buf->buffer));
267	}
268
269	#else
270	#define CHECK_BUFFER(in)
271	#endif
272
273
274	/**
275	* xmlParserInputRead:
276	* @in: an XML parser input
277	* @len: an indicative size for the lookahead
278	*
279	* This function was internal and is deprecated.
280	*
281	* Returns -1 as this is an error to use it.
282	*/
283	int
284	xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
285	return(-1);
286	}
287
288	/**
289	* xmlParserInputGrow:
290	* @in: an XML parser input
291	* @len: an indicative size for the lookahead
292	*
293	* This function increase the input for the parser. It tries to
294	* preserve pointers to the input buffer, and keep already read data
295	*
296	* Returns the amount of char read, or -1 in case of error, 0 indicate the
297	* end of this entity
298	*/
299	int
300	xmlParserInputGrow(xmlParserInputPtr in, int len) {
301	int ret;
302	size_t indx;
303	const xmlChar *content;
304
305	if ((in == NULL) \|\| (len < 0)) return(-1);
306	#ifdef DEBUG_INPUT
307	xmlGenericError(xmlGenericErrorContext, "Grow\n");
308	#endif
309	if (in->buf == NULL) return(-1);
310	if (in->base == NULL) return(-1);
311	if (in->cur == NULL) return(-1);
312	if (in->buf->buffer == NULL) return(-1);
313
314	CHECK_BUFFER(in);
315
316	indx = in->cur - in->base;
317	if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
318
319	CHECK_BUFFER(in);
320
321	return(0);
322	}
323	if (in->buf->readcallback != NULL) {
324	ret = xmlParserInputBufferGrow(in->buf, len);
325	} else
326	return(0);
327
328	/*
329	* NOTE : in->base may be a "dangling" i.e. freed pointer in this
330	* block, but we use it really as an integer to do some
331	* pointer arithmetic. Insure will raise it as a bug but in
332	* that specific case, that's not !
333	*/
334
335	content = xmlBufContent(in->buf->buffer);
336	if (in->base != content) {
337	/*
338	* the buffer has been reallocated
339	*/
340	indx = in->cur - in->base;
341	in->base = content;
342	in->cur = &content[indx];
343	}
344	in->end = xmlBufEnd(in->buf->buffer);
345
346	CHECK_BUFFER(in);
347
348	return(ret);
349	}
350
351	/**
352	* xmlParserInputShrink:
353	* @in: an XML parser input
354	*
355	* This function removes used input for the parser.
356	*/
357	void
358	xmlParserInputShrink(xmlParserInputPtr in) {
359	size_t used;
360	size_t ret;
361	size_t indx;
362	const xmlChar *content;
363
364	#ifdef DEBUG_INPUT
365	xmlGenericError(xmlGenericErrorContext, "Shrink\n");
366	#endif
367	if (in == NULL) return;
368	if (in->buf == NULL) return;
369	if (in->base == NULL) return;
370	if (in->cur == NULL) return;
371	if (in->buf->buffer == NULL) return;
372
373	CHECK_BUFFER(in);
374
375	used = in->cur - xmlBufContent(in->buf->buffer);
376	/*
377	* Do not shrink on large buffers whose only a tiny fraction
378	* was consumed
379	*/
380	if (used > INPUT_CHUNK) {
381	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
382	if (ret > 0) {
383	in->cur -= ret;
384	in->consumed += ret;
385	}
386	in->end = xmlBufEnd(in->buf->buffer);
387	}
388
389	CHECK_BUFFER(in);
390
391	if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
392	return;
393	}
394	xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
395	content = xmlBufContent(in->buf->buffer);
396	if (in->base != content) {
397	/*
398	* the buffer has been reallocated
399	*/
400	indx = in->cur - in->base;
401	in->base = content;
402	in->cur = &content[indx];
403	}
404	in->end = xmlBufEnd(in->buf->buffer);
405
406	CHECK_BUFFER(in);
407	}
408
409	/************************************************************************
410	* *
411	* UTF8 character input and related functions *
412	* *
413	************************************************************************/
414
415	/**
416	* xmlNextChar:
417	* @ctxt: the XML parser context
418	*
419	* Skip to the next char input char.
420	*/
421
422	void
423	xmlNextChar(xmlParserCtxtPtr ctxt)
424	{
425	if ((ctxt == NULL) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
426	(ctxt->input == NULL))
427	return;
428
429	if (!(VALID_CTXT(ctxt))) {
430	xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
431	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
432	xmlStopParser(ctxt);
433	return;
434	}
435
436	if ((*ctxt->input->cur == 0) &&
437	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
438	if ((ctxt->instate != XML_PARSER_COMMENT))
439	xmlPopInput(ctxt);
440	return;
441	}
442
443	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
444	const unsigned char *cur;
445	unsigned char c;
446
447	/*
448	* 2.11 End-of-Line Handling
449	* the literal two-character sequence "#xD#xA" or a standalone
450	* literal #xD, an XML processor must pass to the application
451	* the single character #xA.
452	*/
453	if (*(ctxt->input->cur) == '\n') {
454	ctxt->input->line++; ctxt->input->col = 1;
455	} else
456	ctxt->input->col++;
457
458	/*
459	* We are supposed to handle UTF8, check it's valid
460	* From rfc2044: encoding of the Unicode values on UTF-8:
461	*
462	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
463	* 0000 0000-0000 007F 0xxxxxxx
464	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
465	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
466	*
467	* Check for the 0x110000 limit too
468	*/
469	cur = ctxt->input->cur;
470
471	c = *cur;
472	if (c & 0x80) {
473	if (c == 0xC0)
474	goto encoding_error;
475	if (cur[1] == 0) {
476	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
477	cur = ctxt->input->cur;
478	}
479	if ((cur[1] & 0xc0) != 0x80)
480	goto encoding_error;
481	if ((c & 0xe0) == 0xe0) {
482	unsigned int val;
483
484	if (cur[2] == 0) {
485	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
486	cur = ctxt->input->cur;
487	}
488	if ((cur[2] & 0xc0) != 0x80)
489	goto encoding_error;
490	if ((c & 0xf0) == 0xf0) {
491	if (cur[3] == 0) {
492	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
493	cur = ctxt->input->cur;
494	}
495	if (((c & 0xf8) != 0xf0) \|\|
496	((cur[3] & 0xc0) != 0x80))
497	goto encoding_error;
498	/* 4-byte code */
499	ctxt->input->cur += 4;
500	val = (cur[0] & 0x7) << 18;
501	val \|= (cur[1] & 0x3f) << 12;
502	val \|= (cur[2] & 0x3f) << 6;
503	val \|= cur[3] & 0x3f;
504	} else {
505	/* 3-byte code */
506	ctxt->input->cur += 3;
507	val = (cur[0] & 0xf) << 12;
508	val \|= (cur[1] & 0x3f) << 6;
509	val \|= cur[2] & 0x3f;
510	}
511	if (((val > 0xd7ff) && (val < 0xe000)) \|\|
512	((val > 0xfffd) && (val < 0x10000)) \|\|
513	(val >= 0x110000)) {
514	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
515	"Char 0x%X out of allowed range\n",
516	val);
517	}
518	} else
519	/* 2-byte code */
520	ctxt->input->cur += 2;
521	} else
522	/* 1-byte code */
523	ctxt->input->cur++;
524
525	ctxt->nbChars++;
526	if (*ctxt->input->cur == 0)
527	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
528	} else {
529	/*
530	* Assume it's a fixed length encoding (1) with
531	* a compatible encoding for the ASCII set, since
532	* XML constructs only use < 128 chars
533	*/
534
535	if (*(ctxt->input->cur) == '\n') {
536	ctxt->input->line++; ctxt->input->col = 1;
537	} else
538	ctxt->input->col++;
539	ctxt->input->cur++;
540	ctxt->nbChars++;
541	if (*ctxt->input->cur == 0)
542	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
543	}
544	if ((*ctxt->input->cur == '%') && (!ctxt->html))
545	xmlParserHandlePEReference(ctxt);
546	if ((*ctxt->input->cur == 0) &&
547	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
548	xmlPopInput(ctxt);
549	return;
550	encoding_error:
551	/*
552	* If we detect an UTF8 error that probably mean that the
553	* input encoding didn't get properly advertised in the
554	* declaration header. Report the error and switch the encoding
555	* to ISO-Latin-1 (if you don't like this policy, just declare the
556	* encoding !)
557	*/
558	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
559	(ctxt->input->end - ctxt->input->cur < 4)) {
560	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
561	"Input is not proper UTF-8, indicate encoding !\n",
562	NULL, NULL);
563	} else {
564	char buffer[150];
565
566	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
567	ctxt->input->cur[0], ctxt->input->cur[1],
568	ctxt->input->cur[2], ctxt->input->cur[3]);
569	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
570	"Input is not proper UTF-8, indicate encoding !\n%s",
571	BAD_CAST buffer, NULL);
572	}
573	ctxt->charset = XML_CHAR_ENCODING_8859_1;
574	ctxt->input->cur++;
575	return;
576	}
577
578	/**
579	* xmlCurrentChar:
580	* @ctxt: the XML parser context
581	* @len: pointer to the length of the char read
582	*
583	* The current char value, if using UTF-8 this may actually span multiple
584	* bytes in the input buffer. Implement the end of line normalization:
585	* 2.11 End-of-Line Handling
586	* Wherever an external parsed entity or the literal entity value
587	* of an internal parsed entity contains either the literal two-character
588	* sequence "#xD#xA" or a standalone literal #xD, an XML processor
589	* must pass to the application the single character #xA.
590	* This behavior can conveniently be produced by normalizing all
591	* line breaks to #xA on input, before parsing.)
592	*
593	* Returns the current char value and its length
594	*/
595
596	int
597	xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
598	if ((ctxt == NULL) \|\| (len == NULL) \|\| (ctxt->input == NULL)) return(0);
599	if (ctxt->instate == XML_PARSER_EOF)
600	return(0);
601
602	if ((ctxt->input->cur >= 0x20) && (ctxt->input->cur <= 0x7F)) {
603	*len = 1;
604	return((int) *ctxt->input->cur);
605	}
606	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
607	/*
608	* We are supposed to handle UTF8, check it's valid
609	* From rfc2044: encoding of the Unicode values on UTF-8:
610	*
611	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
612	* 0000 0000-0000 007F 0xxxxxxx
613	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
614	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
615	*
616	* Check for the 0x110000 limit too
617	*/
618	const unsigned char *cur = ctxt->input->cur;
619	unsigned char c;
620	unsigned int val;
621
622	c = *cur;
623	if (c & 0x80) {
624	if (((c & 0x40) == 0) \|\| (c == 0xC0))
625	goto encoding_error;
626	if (cur[1] == 0) {
627	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
628	cur = ctxt->input->cur;
629	}
630	if ((cur[1] & 0xc0) != 0x80)
631	goto encoding_error;
632	if ((c & 0xe0) == 0xe0) {
633	if (cur[2] == 0) {
634	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
635	cur = ctxt->input->cur;
636	}
637	if ((cur[2] & 0xc0) != 0x80)
638	goto encoding_error;
639	if ((c & 0xf0) == 0xf0) {
640	if (cur[3] == 0) {
641	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
642	cur = ctxt->input->cur;
643	}
644	if (((c & 0xf8) != 0xf0) \|\|
645	((cur[3] & 0xc0) != 0x80))
646	goto encoding_error;
647	/* 4-byte code */
648	*len = 4;
649	val = (cur[0] & 0x7) << 18;
650	val \|= (cur[1] & 0x3f) << 12;
651	val \|= (cur[2] & 0x3f) << 6;
652	val \|= cur[3] & 0x3f;
653	if (val < 0x10000)
654	goto encoding_error;
655	} else {
656	/* 3-byte code */
657	*len = 3;
658	val = (cur[0] & 0xf) << 12;
659	val \|= (cur[1] & 0x3f) << 6;
660	val \|= cur[2] & 0x3f;
661	if (val < 0x800)
662	goto encoding_error;
663	}
664	} else {
665	/* 2-byte code */
666	*len = 2;
667	val = (cur[0] & 0x1f) << 6;
668	val \|= cur[1] & 0x3f;
669	if (val < 0x80)
670	goto encoding_error;
671	}
672	if (!IS_CHAR(val)) {
673	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
674	"Char 0x%X out of allowed range\n", val);
675	}
676	return(val);
677	} else {
678	/* 1-byte code */
679	*len = 1;
680	if (*ctxt->input->cur == 0)
681	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
682	if ((*ctxt->input->cur == 0) &&
683	(ctxt->input->end > ctxt->input->cur)) {
684	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
685	"Char 0x0 out of allowed range\n", 0);
686	}
687	if (*ctxt->input->cur == 0xD) {
688	if (ctxt->input->cur[1] == 0xA) {
689	ctxt->nbChars++;
690	ctxt->input->cur++;
691	}
692	return(0xA);
693	}
694	return((int) *ctxt->input->cur);
695	}
696	}
697	/*
698	* Assume it's a fixed length encoding (1) with
699	* a compatible encoding for the ASCII set, since
700	* XML constructs only use < 128 chars
701	*/
702	*len = 1;
703	if (*ctxt->input->cur == 0xD) {
704	if (ctxt->input->cur[1] == 0xA) {
705	ctxt->nbChars++;
706	ctxt->input->cur++;
707	}
708	return(0xA);
709	}
710	return((int) *ctxt->input->cur);
711	encoding_error:
712	/*
713	* An encoding problem may arise from a truncated input buffer
714	* splitting a character in the middle. In that case do not raise
715	* an error but return 0 to endicate an end of stream problem
716	*/
717	if (ctxt->input->end - ctxt->input->cur < 4) {
718	*len = 0;
719	return(0);
720	}
721
722	/*
723	* If we detect an UTF8 error that probably mean that the
724	* input encoding didn't get properly advertised in the
725	* declaration header. Report the error and switch the encoding
726	* to ISO-Latin-1 (if you don't like this policy, just declare the
727	* encoding !)
728	*/
729	{
730	char buffer[150];
731
732	snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
733	ctxt->input->cur[0], ctxt->input->cur[1],
734	ctxt->input->cur[2], ctxt->input->cur[3]);
735	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
736	"Input is not proper UTF-8, indicate encoding !\n%s",
737	BAD_CAST buffer, NULL);
738	}
739	ctxt->charset = XML_CHAR_ENCODING_8859_1;
740	*len = 1;
741	return((int) *ctxt->input->cur);
742	}
743
744	/**
745	* xmlStringCurrentChar:
746	* @ctxt: the XML parser context
747	* @cur: pointer to the beginning of the char
748	* @len: pointer to the length of the char read
749	*
750	* The current char value, if using UTF-8 this may actually span multiple
751	* bytes in the input buffer.
752	*
753	* Returns the current char value and its length
754	*/
755
756	int
757	xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
758	{
759	if ((len == NULL) \|\| (cur == NULL)) return(0);
760	if ((ctxt == NULL) \|\| (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
761	/*
762	* We are supposed to handle UTF8, check it's valid
763	* From rfc2044: encoding of the Unicode values on UTF-8:
764	*
765	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
766	* 0000 0000-0000 007F 0xxxxxxx
767	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
768	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
769	*
770	* Check for the 0x110000 limit too
771	*/
772	unsigned char c;
773	unsigned int val;
774
775	c = *cur;
776	if (c & 0x80) {
777	if ((cur[1] & 0xc0) != 0x80)
778	goto encoding_error;
779	if ((c & 0xe0) == 0xe0) {
780
781	if ((cur[2] & 0xc0) != 0x80)
782	goto encoding_error;
783	if ((c & 0xf0) == 0xf0) {
784	if (((c & 0xf8) != 0xf0) \|\| ((cur[3] & 0xc0) != 0x80))
785	goto encoding_error;
786	/* 4-byte code */
787	*len = 4;
788	val = (cur[0] & 0x7) << 18;
789	val \|= (cur[1] & 0x3f) << 12;
790	val \|= (cur[2] & 0x3f) << 6;
791	val \|= cur[3] & 0x3f;
792	} else {
793	/* 3-byte code */
794	*len = 3;
795	val = (cur[0] & 0xf) << 12;
796	val \|= (cur[1] & 0x3f) << 6;
797	val \|= cur[2] & 0x3f;
798	}
799	} else {
800	/* 2-byte code */
801	*len = 2;
802	val = (cur[0] & 0x1f) << 6;
803	val \|= cur[1] & 0x3f;
804	}
805	if (!IS_CHAR(val)) {
806	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
807	"Char 0x%X out of allowed range\n", val);
808	}
809	return (val);
810	} else {
811	/* 1-byte code */
812	*len = 1;
813	return ((int) *cur);
814	}
815	}
816	/*
817	* Assume it's a fixed length encoding (1) with
818	* a compatible encoding for the ASCII set, since
819	* XML constructs only use < 128 chars
820	*/
821	*len = 1;
822	return ((int) *cur);
823	encoding_error:
824
825	/*
826	* An encoding problem may arise from a truncated input buffer
827	* splitting a character in the middle. In that case do not raise
828	* an error but return 0 to endicate an end of stream problem
829	*/
830	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
831	(ctxt->input->end - ctxt->input->cur < 4)) {
832	*len = 0;
833	return(0);
834	}
835	/*
836	* If we detect an UTF8 error that probably mean that the
837	* input encoding didn't get properly advertised in the
838	* declaration header. Report the error and switch the encoding
839	* to ISO-Latin-1 (if you don't like this policy, just declare the
840	* encoding !)
841	*/
842	{
843	char buffer[150];
844
845	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
846	ctxt->input->cur[0], ctxt->input->cur[1],
847	ctxt->input->cur[2], ctxt->input->cur[3]);
848	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
849	"Input is not proper UTF-8, indicate encoding !\n%s",
850	BAD_CAST buffer, NULL);
851	}
852	*len = 1;
853	return ((int) *cur);
854	}
855
856	/**
857	* xmlCopyCharMultiByte:
858	* @out: pointer to an array of xmlChar
859	* @val: the char value
860	*
861	* append the char value in the array
862	*
863	* Returns the number of xmlChar written
864	*/
865	int
866	xmlCopyCharMultiByte(xmlChar *out, int val) {
867	if (out == NULL) return(0);
868	/*
869	* We are supposed to handle UTF8, check it's valid
870	* From rfc2044: encoding of the Unicode values on UTF-8:
871	*
872	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
873	* 0000 0000-0000 007F 0xxxxxxx
874	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
875	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
876	*/
877	if (val >= 0x80) {
878	xmlChar *savedout = out;
879	int bits;
880	if (val < 0x800) { *out++= (val >> 6) \| 0xC0; bits= 0; }
881	else if (val < 0x10000) { *out++= (val >> 12) \| 0xE0; bits= 6;}
882	else if (val < 0x110000) { *out++= (val >> 18) \| 0xF0; bits= 12; }
883	else {
884	xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
885	"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
886	val);
887	return(0);
888	}
889	for ( ; bits >= 0; bits-= 6)
890	*out++= ((val >> bits) & 0x3F) \| 0x80 ;
891	return (out - savedout);
892	}
893	*out = (xmlChar) val;
894	return 1;
895	}
896
897	/**
898	* xmlCopyChar:
899	* @len: Ignored, compatibility
900	* @out: pointer to an array of xmlChar
901	* @val: the char value
902	*
903	* append the char value in the array
904	*
905	* Returns the number of xmlChar written
906	*/
907
908	int
909	xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
910	if (out == NULL) return(0);
911	/* the len parameter is ignored */
912	if (val >= 0x80) {
913	return(xmlCopyCharMultiByte (out, val));
914	}
915	*out = (xmlChar) val;
916	return 1;
917	}
918
919	/************************************************************************
920	* *
921	* Commodity functions to switch encodings *
922	* *
923	************************************************************************/
924
925	static int
926	xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
927	xmlCharEncodingHandlerPtr handler, int len);
928	static int
929	xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
930	xmlCharEncodingHandlerPtr handler, int len);
931	/**
932	* xmlSwitchEncoding:
933	* @ctxt: the parser context
934	* @enc: the encoding value (number)
935	*
936	* change the input functions when discovering the character encoding
937	* of a given entity.
938	*
939	* Returns 0 in case of success, -1 otherwise
940	*/
941	int
942	xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
943	{
944	xmlCharEncodingHandlerPtr handler;
945	int len = -1;
946	int ret;
947
948	if (ctxt == NULL) return(-1);
949	switch (enc) {
950	case XML_CHAR_ENCODING_ERROR:
951	__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
952	"encoding unknown\n", NULL, NULL);
953	return(-1);
954	case XML_CHAR_ENCODING_NONE:
955	/* let's assume it's UTF-8 without the XML decl */
956	ctxt->charset = XML_CHAR_ENCODING_UTF8;
957	return(0);
958	case XML_CHAR_ENCODING_UTF8:
959	/* default encoding, no conversion should be needed */
960	ctxt->charset = XML_CHAR_ENCODING_UTF8;
961
962	/*
963	* Errata on XML-1.0 June 20 2001
964	* Specific handling of the Byte Order Mark for
965	* UTF-8
966	*/
967	if ((ctxt->input != NULL) &&
968	(ctxt->input->cur[0] == 0xEF) &&
969	(ctxt->input->cur[1] == 0xBB) &&
970	(ctxt->input->cur[2] == 0xBF)) {
971	ctxt->input->cur += 3;
972	}
973	return(0);
974	case XML_CHAR_ENCODING_UTF16LE:
975	case XML_CHAR_ENCODING_UTF16BE:
976	/*The raw input characters are encoded
977	*in UTF-16. As we expect this function
978	*to be called after xmlCharEncInFunc, we expect
979	*ctxt->input->cur to contain UTF-8 encoded characters.
980	*So the raw UTF16 Byte Order Mark
981	*has also been converted into
982	*an UTF-8 BOM. Let's skip that BOM.
983	*/
984	if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
985	(ctxt->input->cur[0] == 0xEF) &&
986	(ctxt->input->cur[1] == 0xBB) &&
987	(ctxt->input->cur[2] == 0xBF)) {
988	ctxt->input->cur += 3;
989	}
990	len = 90;
991	break;
992	case XML_CHAR_ENCODING_UCS2:
993	len = 90;
994	break;
995	case XML_CHAR_ENCODING_UCS4BE:
996	case XML_CHAR_ENCODING_UCS4LE:
997	case XML_CHAR_ENCODING_UCS4_2143:
998	case XML_CHAR_ENCODING_UCS4_3412:
999	len = 180;
1000	break;
1001	case XML_CHAR_ENCODING_EBCDIC:
1002	case XML_CHAR_ENCODING_8859_1:
1003	case XML_CHAR_ENCODING_8859_2:
1004	case XML_CHAR_ENCODING_8859_3:
1005	case XML_CHAR_ENCODING_8859_4:
1006	case XML_CHAR_ENCODING_8859_5:
1007	case XML_CHAR_ENCODING_8859_6:
1008	case XML_CHAR_ENCODING_8859_7:
1009	case XML_CHAR_ENCODING_8859_8:
1010	case XML_CHAR_ENCODING_8859_9:
1011	case XML_CHAR_ENCODING_ASCII:
1012	case XML_CHAR_ENCODING_2022_JP:
1013	case XML_CHAR_ENCODING_SHIFT_JIS:
1014	case XML_CHAR_ENCODING_EUC_JP:
1015	len = 45;
1016	break;
1017	}
1018	handler = xmlGetCharEncodingHandler(enc);
1019	if (handler == NULL) {
1020	/*
1021	* Default handlers.
1022	*/
1023	switch (enc) {
1024	case XML_CHAR_ENCODING_ASCII:
1025	/* default encoding, no conversion should be needed */
1026	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1027	return(0);
1028	case XML_CHAR_ENCODING_UTF16LE:
1029	break;
1030	case XML_CHAR_ENCODING_UTF16BE:
1031	break;
1032	case XML_CHAR_ENCODING_UCS4LE:
1033	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1034	"encoding not supported %s\n",
1035	BAD_CAST "USC4 little endian", NULL);
1036	break;
1037	case XML_CHAR_ENCODING_UCS4BE:
1038	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1039	"encoding not supported %s\n",
1040	BAD_CAST "USC4 big endian", NULL);
1041	break;
1042	case XML_CHAR_ENCODING_EBCDIC:
1043	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1044	"encoding not supported %s\n",
1045	BAD_CAST "EBCDIC", NULL);
1046	break;
1047	case XML_CHAR_ENCODING_UCS4_2143:
1048	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1049	"encoding not supported %s\n",
1050	BAD_CAST "UCS4 2143", NULL);
1051	break;
1052	case XML_CHAR_ENCODING_UCS4_3412:
1053	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1054	"encoding not supported %s\n",
1055	BAD_CAST "UCS4 3412", NULL);
1056	break;
1057	case XML_CHAR_ENCODING_UCS2:
1058	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1059	"encoding not supported %s\n",
1060	BAD_CAST "UCS2", NULL);
1061	break;
1062	case XML_CHAR_ENCODING_8859_1:
1063	case XML_CHAR_ENCODING_8859_2:
1064	case XML_CHAR_ENCODING_8859_3:
1065	case XML_CHAR_ENCODING_8859_4:
1066	case XML_CHAR_ENCODING_8859_5:
1067	case XML_CHAR_ENCODING_8859_6:
1068	case XML_CHAR_ENCODING_8859_7:
1069	case XML_CHAR_ENCODING_8859_8:
1070	case XML_CHAR_ENCODING_8859_9:
1071	/*
1072	* We used to keep the internal content in the
1073	* document encoding however this turns being unmaintainable
1074	* So xmlGetCharEncodingHandler() will return non-null
1075	* values for this now.
1076	*/
1077	if ((ctxt->inputNr == 1) &&
1078	(ctxt->encoding == NULL) &&
1079	(ctxt->input != NULL) &&
1080	(ctxt->input->encoding != NULL)) {
1081	ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1082	}
1083	ctxt->charset = enc;
1084	return(0);
1085	case XML_CHAR_ENCODING_2022_JP:
1086	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1087	"encoding not supported %s\n",
1088	BAD_CAST "ISO-2022-JP", NULL);
1089	break;
1090	case XML_CHAR_ENCODING_SHIFT_JIS:
1091	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1092	"encoding not supported %s\n",
1093	BAD_CAST "Shift_JIS", NULL);
1094	break;
1095	case XML_CHAR_ENCODING_EUC_JP:
1096	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1097	"encoding not supported %s\n",
1098	BAD_CAST "EUC-JP", NULL);
1099	break;
1100	default:
1101	break;
1102	}
1103	}
1104	if (handler == NULL)
1105	return(-1);
1106	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1107	ret = xmlSwitchToEncodingInt(ctxt, handler, len);
1108	if ((ret < 0) \|\| (ctxt->errNo == XML_I18N_CONV_FAILED)) {
1109	/*
1110	* on encoding conversion errors, stop the parser
1111	*/
1112	xmlStopParser(ctxt);
1113	ctxt->errNo = XML_I18N_CONV_FAILED;
1114	}
1115	return(ret);
1116	}
1117
1118	/**
1119	* xmlSwitchInputEncoding:
1120	* @ctxt: the parser context
1121	* @input: the input stream
1122	* @handler: the encoding handler
1123	* @len: the number of bytes to convert for the first line or -1
1124	*
1125	* change the input functions when discovering the character encoding
1126	* of a given entity.
1127	*
1128	* Returns 0 in case of success, -1 otherwise
1129	*/
1130	static int
1131	xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1132	xmlCharEncodingHandlerPtr handler, int len)
1133	{
1134	int nbchars;
1135
1136	if (handler == NULL)
1137	return (-1);
1138	if (input == NULL)
1139	return (-1);
1140	if (input->buf != NULL) {
1141	if (input->buf->encoder != NULL) {
1142	/*
1143	* Check in case the auto encoding detetection triggered
1144	* in already.
1145	*/
1146	if (input->buf->encoder == handler)
1147	return (0);
1148
1149	/*
1150	* "UTF-16" can be used for both LE and BE
1151	if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1152	BAD_CAST "UTF-16", 6)) &&
1153	(!xmlStrncmp(BAD_CAST handler->name,
1154	BAD_CAST "UTF-16", 6))) {
1155	return(0);
1156	}
1157	*/
1158
1159	/*
1160	* Note: this is a bit dangerous, but that's what it
1161	* takes to use nearly compatible signature for different
1162	* encodings.
1163	*/
1164	xmlCharEncCloseFunc(input->buf->encoder);
1165	input->buf->encoder = handler;
1166	return (0);
1167	}
1168	input->buf->encoder = handler;
1169
1170	/*
1171	* Is there already some content down the pipe to convert ?
1172	*/
1173	if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1174	int processed;
1175	unsigned int use;
1176
1177	/*
1178	* Specific handling of the Byte Order Mark for
1179	* UTF-16
1180	*/
1181	if ((handler->name != NULL) &&
1182	(!strcmp(handler->name, "UTF-16LE") \|\|
1183	!strcmp(handler->name, "UTF-16")) &&
1184	(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1185	input->cur += 2;
1186	}
1187	if ((handler->name != NULL) &&
1188	(!strcmp(handler->name, "UTF-16BE")) &&
1189	(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1190	input->cur += 2;
1191	}
1192	/*
1193	* Errata on XML-1.0 June 20 2001
1194	* Specific handling of the Byte Order Mark for
1195	* UTF-8
1196	*/
1197	if ((handler->name != NULL) &&
1198	(!strcmp(handler->name, "UTF-8")) &&
1199	(input->cur[0] == 0xEF) &&
1200	(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1201	input->cur += 3;
1202	}
1203
1204	/*
1205	* Shrink the current input buffer.
1206	* Move it as the raw buffer and create a new input buffer
1207	*/
1208	processed = input->cur - input->base;
1209	xmlBufShrink(input->buf->buffer, processed);
1210	input->buf->raw = input->buf->buffer;
1211	input->buf->buffer = xmlBufCreate();
1212	input->buf->rawconsumed = processed;
1213	use = xmlBufUse(input->buf->raw);
1214
1215	if (ctxt->html) {
1216	/*
1217	* convert as much as possible of the buffer
1218	*/
1219	nbchars = xmlCharEncInput(input->buf, 1);
1220	} else {
1221	/*
1222	* convert just enough to get
1223	* '<?xml version="1.0" encoding="xxx"?>'
1224	* parsed with the autodetected encoding
1225	* into the parser reading buffer.
1226	*/
1227	nbchars = xmlCharEncFirstLineInput(input->buf, len);
1228	}
1229	if (nbchars < 0) {
1230	xmlErrInternal(ctxt,
1231	"switching encoding: encoder error\n",
1232	NULL);
1233	return (-1);
1234	}
1235	input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
1236	xmlBufResetInput(input->buf->buffer, input);
1237	}
1238	return (0);
1239	} else if (input->length == 0) {
1240	/*
1241	* When parsing a static memory array one must know the
1242	* size to be able to convert the buffer.
1243	*/
1244	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1245	return (-1);
1246	}
1247	return (0);
1248	}
1249
1250	/**
1251	* xmlSwitchInputEncoding:
1252	* @ctxt: the parser context
1253	* @input: the input stream
1254	* @handler: the encoding handler
1255	*
1256	* change the input functions when discovering the character encoding
1257	* of a given entity.
1258	*
1259	* Returns 0 in case of success, -1 otherwise
1260	*/
1261	int
1262	xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1263	xmlCharEncodingHandlerPtr handler) {
1264	return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1265	}
1266
1267	/**
1268	* xmlSwitchToEncodingInt:
1269	* @ctxt: the parser context
1270	* @handler: the encoding handler
1271	* @len: the length to convert or -1
1272	*
1273	* change the input functions when discovering the character encoding
1274	* of a given entity, and convert only @len bytes of the output, this
1275	* is needed on auto detect to allows any declared encoding later to
1276	* convert the actual content after the xmlDecl
1277	*
1278	* Returns 0 in case of success, -1 otherwise
1279	*/
1280	static int
1281	xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
1282	xmlCharEncodingHandlerPtr handler, int len) {
1283	int ret = 0;
1284
1285	if (handler != NULL) {
1286	if (ctxt->input != NULL) {
1287	ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
1288	} else {
1289	xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1290	NULL);
1291	return(-1);
1292	}
1293	/*
1294	* The parsing is now done in UTF8 natively
1295	*/
1296	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1297	} else
1298	return(-1);
1299	return(ret);
1300	}
1301
1302	/**
1303	* xmlSwitchToEncoding:
1304	* @ctxt: the parser context
1305	* @handler: the encoding handler
1306	*
1307	* change the input functions when discovering the character encoding
1308	* of a given entity.
1309	*
1310	* Returns 0 in case of success, -1 otherwise
1311	*/
1312	int
1313	xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1314	{
1315	return (xmlSwitchToEncodingInt(ctxt, handler, -1));
1316	}
1317
1318	/************************************************************************
1319	* *
1320	* Commodity functions to handle entities processing *
1321	* *
1322	************************************************************************/
1323
1324	/**
1325	* xmlFreeInputStream:
1326	* @input: an xmlParserInputPtr
1327	*
1328	* Free up an input stream.
1329	*/
1330	void
1331	xmlFreeInputStream(xmlParserInputPtr input) {
1332	if (input == NULL) return;
1333
1334	if (input->filename != NULL) xmlFree((char *) input->filename);
1335	if (input->directory != NULL) xmlFree((char *) input->directory);
1336	if (input->encoding != NULL) xmlFree((char *) input->encoding);
1337	if (input->version != NULL) xmlFree((char *) input->version);
1338	if ((input->free != NULL) && (input->base != NULL))
1339	input->free((xmlChar *) input->base);
1340	if (input->buf != NULL)
1341	xmlFreeParserInputBuffer(input->buf);
1342	xmlFree(input);
1343	}
1344
1345	/**
1346	* xmlNewInputStream:
1347	* @ctxt: an XML parser context
1348	*
1349	* Create a new input stream structure.
1350	*
1351	* Returns the new input stream or NULL
1352	*/
1353	xmlParserInputPtr
1354	xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1355	xmlParserInputPtr input;
1356
1357	input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1358	if (input == NULL) {
1359	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1360	return(NULL);
1361	}
1362	memset(input, 0, sizeof(xmlParserInput));
1363	input->line = 1;
1364	input->col = 1;
1365	input->standalone = -1;
1366
1367	/*
1368	* If the context is NULL the id cannot be initialized, but that
1369	* should not happen while parsing which is the situation where
1370	* the id is actually needed.
1371	*/
1372	if (ctxt != NULL)
1373	input->id = ctxt->input_id++;
1374
1375	return(input);
1376	}
1377
1378	/**
1379	* xmlNewIOInputStream:
1380	* @ctxt: an XML parser context
1381	* @input: an I/O Input
1382	* @enc: the charset encoding if known
1383	*
1384	* Create a new input stream structure encapsulating the @input into
1385	* a stream suitable for the parser.
1386	*
1387	* Returns the new input stream or NULL
1388	*/
1389	xmlParserInputPtr
1390	xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1391	xmlCharEncoding enc) {
1392	xmlParserInputPtr inputStream;
1393
1394	if (input == NULL) return(NULL);
1395	if (xmlParserDebugEntities)
1396	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1397	inputStream = xmlNewInputStream(ctxt);
1398	if (inputStream == NULL) {
1399	return(NULL);
1400	}
1401	inputStream->filename = NULL;
1402	inputStream->buf = input;
1403	xmlBufResetInput(inputStream->buf->buffer, inputStream);
1404
1405	if (enc != XML_CHAR_ENCODING_NONE) {
1406	xmlSwitchEncoding(ctxt, enc);
1407	}
1408
1409	return(inputStream);
1410	}
1411
1412	/**
1413	* xmlNewEntityInputStream:
1414	* @ctxt: an XML parser context
1415	* @entity: an Entity pointer
1416	*
1417	* Create a new input stream based on an xmlEntityPtr
1418	*
1419	* Returns the new input stream or NULL
1420	*/
1421	xmlParserInputPtr
1422	xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1423	xmlParserInputPtr input;
1424
1425	if (entity == NULL) {
1426	xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1427	NULL);
1428	return(NULL);
1429	}
1430	if (xmlParserDebugEntities)
1431	xmlGenericError(xmlGenericErrorContext,
1432	"new input from entity: %s\n", entity->name);
1433	if (entity->content == NULL) {
1434	switch (entity->etype) {
1435	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1436	xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1437	entity->name);
1438	break;
1439	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1440	case XML_EXTERNAL_PARAMETER_ENTITY:
1441	return(xmlLoadExternalEntity((char *) entity->URI,
1442	(char *) entity->ExternalID, ctxt));
1443	case XML_INTERNAL_GENERAL_ENTITY:
1444	xmlErrInternal(ctxt,
1445	"Internal entity %s without content !\n",
1446	entity->name);
1447	break;
1448	case XML_INTERNAL_PARAMETER_ENTITY:
1449	xmlErrInternal(ctxt,
1450	"Internal parameter entity %s without content !\n",
1451	entity->name);
1452	break;
1453	case XML_INTERNAL_PREDEFINED_ENTITY:
1454	xmlErrInternal(ctxt,
1455	"Predefined entity %s without content !\n",
1456	entity->name);
1457	break;
1458	}
1459	return(NULL);
1460	}
1461	input = xmlNewInputStream(ctxt);
1462	if (input == NULL) {
1463	return(NULL);
1464	}
1465	if (entity->URI != NULL)
1466	input->filename = (char ) xmlStrdup((xmlChar ) entity->URI);
1467	input->base = entity->content;
1468	if (entity->length == 0)
1469	entity->length = xmlStrlen(entity->content);
1470	input->cur = entity->content;
1471	input->length = entity->length;
1472	input->end = &entity->content[input->length];
1473	return(input);
1474	}
1475
1476	/**
1477	* xmlNewStringInputStream:
1478	* @ctxt: an XML parser context
1479	* @buffer: an memory buffer
1480	*
1481	* Create a new input stream based on a memory buffer.
1482	* Returns the new input stream
1483	*/
1484	xmlParserInputPtr
1485	xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1486	xmlParserInputPtr input;
1487
1488	if (buffer == NULL) {
1489	xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1490	NULL);
1491	return(NULL);
1492	}
1493	if (xmlParserDebugEntities)
1494	xmlGenericError(xmlGenericErrorContext,
1495	"new fixed input: %.30s\n", buffer);
1496	input = xmlNewInputStream(ctxt);
1497	if (input == NULL) {
1498	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1499	return(NULL);
1500	}
1501	input->base = buffer;
1502	input->cur = buffer;
1503	input->length = xmlStrlen(buffer);
1504	input->end = &buffer[input->length];
1505	return(input);
1506	}
1507
1508	/**
1509	* xmlNewInputFromFile:
1510	* @ctxt: an XML parser context
1511	* @filename: the filename to use as entity
1512	*
1513	* Create a new input stream based on a file or an URL.
1514	*
1515	* Returns the new input stream or NULL in case of error
1516	*/
1517	xmlParserInputPtr
1518	xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1519	xmlParserInputBufferPtr buf;
1520	xmlParserInputPtr inputStream;
1521	char *directory = NULL;
1522	xmlChar *URI = NULL;
1523
1524	if (xmlParserDebugEntities)
1525	xmlGenericError(xmlGenericErrorContext,
1526	"new input from file: %s\n", filename);
1527	if (ctxt == NULL) return(NULL);
1528	buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1529	if (buf == NULL) {
1530	if (filename == NULL)
1531	__xmlLoaderErr(ctxt,
1532	"failed to load external entity: NULL filename \n",
1533	NULL);
1534	else
1535	__xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1536	(const char *) filename);
1537	return(NULL);
1538	}
1539
1540	inputStream = xmlNewInputStream(ctxt);
1541	if (inputStream == NULL)
1542	return(NULL);
1543
1544	inputStream->buf = buf;
1545	inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1546	if (inputStream == NULL)
1547	return(NULL);
1548
1549	if (inputStream->filename == NULL)
1550	URI = xmlStrdup((xmlChar *) filename);
1551	else
1552	URI = xmlStrdup((xmlChar *) inputStream->filename);
1553	directory = xmlParserGetDirectory((const char *) URI);
1554	if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1555	inputStream->filename = (char ) xmlCanonicPath((const xmlChar ) URI);
1556	if (URI != NULL) xmlFree((char *) URI);
1557	inputStream->directory = directory;
1558
1559	xmlBufResetInput(inputStream->buf->buffer, inputStream);
1560	if ((ctxt->directory == NULL) && (directory != NULL))
1561	ctxt->directory = (char ) xmlStrdup((const xmlChar ) directory);
1562	return(inputStream);
1563	}
1564
1565	/************************************************************************
1566	* *
1567	* Commodity functions to handle parser contexts *
1568	* *
1569	************************************************************************/
1570
1571	/**
1572	* xmlInitParserCtxt:
1573	* @ctxt: an XML parser context
1574	*
1575	* Initialize a parser context
1576	*
1577	* Returns 0 in case of success and -1 in case of error
1578	*/
1579
1580	int
1581	xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1582	{
1583	xmlParserInputPtr input;
1584
1585	if(ctxt==NULL) {
1586	xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1587	return(-1);
1588	}
1589
1590	xmlDefaultSAXHandlerInit();
1591
1592	if (ctxt->dict == NULL)
1593	ctxt->dict = xmlDictCreate();
1594	if (ctxt->dict == NULL) {
1595	xmlErrMemory(NULL, "cannot initialize parser context\n");
1596	return(-1);
1597	}
1598	xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1599
1600	if (ctxt->sax == NULL)
1601	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1602	if (ctxt->sax == NULL) {
1603	xmlErrMemory(NULL, "cannot initialize parser context\n");
1604	return(-1);
1605	}
1606	else
1607	xmlSAXVersion(ctxt->sax, 2);
1608
1609	ctxt->maxatts = 0;
1610	ctxt->atts = NULL;
1611	/* Allocate the Input stack */
1612	if (ctxt->inputTab == NULL) {
1613	ctxt->inputTab = (xmlParserInputPtr *)
1614	xmlMalloc(5 * sizeof(xmlParserInputPtr));
1615	ctxt->inputMax = 5;
1616	}
1617	if (ctxt->inputTab == NULL) {
1618	xmlErrMemory(NULL, "cannot initialize parser context\n");
1619	ctxt->inputNr = 0;
1620	ctxt->inputMax = 0;
1621	ctxt->input = NULL;
1622	return(-1);
1623	}
1624	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1625	xmlFreeInputStream(input);
1626	}
1627	ctxt->inputNr = 0;
1628	ctxt->input = NULL;
1629
1630	ctxt->version = NULL;
1631	ctxt->encoding = NULL;
1632	ctxt->standalone = -1;
1633	ctxt->hasExternalSubset = 0;
1634	ctxt->hasPErefs = 0;
1635	ctxt->html = 0;
1636	ctxt->external = 0;
1637	ctxt->instate = XML_PARSER_START;
1638	ctxt->token = 0;
1639	ctxt->directory = NULL;
1640
1641	/* Allocate the Node stack */
1642	if (ctxt->nodeTab == NULL) {
1643	ctxt->nodeTab = (xmlNodePtr ) xmlMalloc(10 sizeof(xmlNodePtr));
1644	ctxt->nodeMax = 10;
1645	}
1646	if (ctxt->nodeTab == NULL) {
1647	xmlErrMemory(NULL, "cannot initialize parser context\n");
1648	ctxt->nodeNr = 0;
1649	ctxt->nodeMax = 0;
1650	ctxt->node = NULL;
1651	ctxt->inputNr = 0;
1652	ctxt->inputMax = 0;
1653	ctxt->input = NULL;
1654	return(-1);
1655	}
1656	ctxt->nodeNr = 0;
1657	ctxt->node = NULL;
1658
1659	/* Allocate the Name stack */
1660	if (ctxt->nameTab == NULL) {
1661	ctxt->nameTab = (const xmlChar *) xmlMalloc(10 sizeof(xmlChar *));
1662	ctxt->nameMax = 10;
1663	}
1664	if (ctxt->nameTab == NULL) {
1665	xmlErrMemory(NULL, "cannot initialize parser context\n");
1666	ctxt->nodeNr = 0;
1667	ctxt->nodeMax = 0;
1668	ctxt->node = NULL;
1669	ctxt->inputNr = 0;
1670	ctxt->inputMax = 0;
1671	ctxt->input = NULL;
1672	ctxt->nameNr = 0;
1673	ctxt->nameMax = 0;
1674	ctxt->name = NULL;
1675	return(-1);
1676	}
1677	ctxt->nameNr = 0;
1678	ctxt->name = NULL;
1679
1680	/* Allocate the space stack */
1681	if (ctxt->spaceTab == NULL) {
1682	ctxt->spaceTab = (int ) xmlMalloc(10 sizeof(int));
1683	ctxt->spaceMax = 10;
1684	}
1685	if (ctxt->spaceTab == NULL) {
1686	xmlErrMemory(NULL, "cannot initialize parser context\n");
1687	ctxt->nodeNr = 0;
1688	ctxt->nodeMax = 0;
1689	ctxt->node = NULL;
1690	ctxt->inputNr = 0;
1691	ctxt->inputMax = 0;
1692	ctxt->input = NULL;
1693	ctxt->nameNr = 0;
1694	ctxt->nameMax = 0;
1695	ctxt->name = NULL;
1696	ctxt->spaceNr = 0;
1697	ctxt->spaceMax = 0;
1698	ctxt->space = NULL;
1699	return(-1);
1700	}
1701	ctxt->spaceNr = 1;
1702	ctxt->spaceMax = 10;
1703	ctxt->spaceTab[0] = -1;
1704	ctxt->space = &ctxt->spaceTab[0];
1705	ctxt->userData = ctxt;
1706	ctxt->myDoc = NULL;
1707	ctxt->wellFormed = 1;
1708	ctxt->nsWellFormed = 1;
1709	ctxt->valid = 1;
1710	ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1711	if (ctxt->loadsubset) {
1712	ctxt->options \|= XML_PARSE_DTDLOAD;
1713	}
1714	ctxt->validate = xmlDoValidityCheckingDefaultValue;
1715	ctxt->pedantic = xmlPedanticParserDefaultValue;
1716	if (ctxt->pedantic) {
1717	ctxt->options \|= XML_PARSE_PEDANTIC;
1718	}
1719	ctxt->linenumbers = xmlLineNumbersDefaultValue;
1720	ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1721	if (ctxt->keepBlanks == 0) {
1722	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1723	ctxt->options \|= XML_PARSE_NOBLANKS;
1724	}
1725
1726	ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1727	ctxt->vctxt.userData = ctxt;
1728	ctxt->vctxt.error = xmlParserValidityError;
1729	ctxt->vctxt.warning = xmlParserValidityWarning;
1730	if (ctxt->validate) {
1731	if (xmlGetWarningsDefaultValue == 0)
1732	ctxt->vctxt.warning = NULL;
1733	else
1734	ctxt->vctxt.warning = xmlParserValidityWarning;
1735	ctxt->vctxt.nodeMax = 0;
1736	ctxt->options \|= XML_PARSE_DTDVALID;
1737	}
1738	ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1739	if (ctxt->replaceEntities) {
1740	ctxt->options \|= XML_PARSE_NOENT;
1741	}
1742	ctxt->record_info = 0;
1743	ctxt->nbChars = 0;
1744	ctxt->checkIndex = 0;
1745	ctxt->inSubset = 0;
1746	ctxt->errNo = XML_ERR_OK;
1747	ctxt->depth = 0;
1748	ctxt->charset = XML_CHAR_ENCODING_UTF8;
1749	ctxt->catalogs = NULL;
1750	ctxt->nbentities = 0;
1751	ctxt->sizeentities = 0;
1752	ctxt->sizeentcopy = 0;
1753	ctxt->input_id = 1;
1754	xmlInitNodeInfoSeq(&ctxt->node_seq);
1755	return(0);
1756	}
1757
1758	/**
1759	* xmlFreeParserCtxt:
1760	* @ctxt: an XML parser context
1761	*
1762	* Free all the memory used by a parser context. However the parsed
1763	* document in ctxt->myDoc is not freed.
1764	*/
1765
1766	void
1767	xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1768	{
1769	xmlParserInputPtr input;
1770
1771	if (ctxt == NULL) return;
1772
1773	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1774	xmlFreeInputStream(input);
1775	}
1776	if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1777	if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1778	if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1779	if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1780	if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1781	if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1782	if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1783	if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1784	if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1785	#ifdef LIBXML_SAX1_ENABLED
1786	if ((ctxt->sax != NULL) &&
1787	(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1788	#else
1789	if (ctxt->sax != NULL)
1790	#endif /* LIBXML_SAX1_ENABLED */
1791	xmlFree(ctxt->sax);
1792	if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1793	if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1794	if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1795	if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1796	if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1797	if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1798	if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1799	if (ctxt->attsDefault != NULL)
1800	xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1801	if (ctxt->attsSpecial != NULL)
1802	xmlHashFree(ctxt->attsSpecial, NULL);
1803	if (ctxt->freeElems != NULL) {
1804	xmlNodePtr cur, next;
1805
1806	cur = ctxt->freeElems;
1807	while (cur != NULL) {
1808	next = cur->next;
1809	xmlFree(cur);
1810	cur = next;
1811	}
1812	}
1813	if (ctxt->freeAttrs != NULL) {
1814	xmlAttrPtr cur, next;
1815
1816	cur = ctxt->freeAttrs;
1817	while (cur != NULL) {
1818	next = cur->next;
1819	xmlFree(cur);
1820	cur = next;
1821	}
1822	}
1823	/*
1824	* cleanup the error strings
1825	*/
1826	if (ctxt->lastError.message != NULL)
1827	xmlFree(ctxt->lastError.message);
1828	if (ctxt->lastError.file != NULL)
1829	xmlFree(ctxt->lastError.file);
1830	if (ctxt->lastError.str1 != NULL)
1831	xmlFree(ctxt->lastError.str1);
1832	if (ctxt->lastError.str2 != NULL)
1833	xmlFree(ctxt->lastError.str2);
1834	if (ctxt->lastError.str3 != NULL)
1835	xmlFree(ctxt->lastError.str3);
1836
1837	#ifdef LIBXML_CATALOG_ENABLED
1838	if (ctxt->catalogs != NULL)
1839	xmlCatalogFreeLocal(ctxt->catalogs);
1840	#endif
1841	xmlFree(ctxt);
1842	}
1843
1844	/**
1845	* xmlNewParserCtxt:
1846	*
1847	* Allocate and initialize a new parser context.
1848	*
1849	* Returns the xmlParserCtxtPtr or NULL
1850	*/
1851
1852	xmlParserCtxtPtr
1853	xmlNewParserCtxt(void)
1854	{
1855	xmlParserCtxtPtr ctxt;
1856
1857	ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1858	if (ctxt == NULL) {
1859	xmlErrMemory(NULL, "cannot allocate parser context\n");
1860	return(NULL);
1861	}
1862	memset(ctxt, 0, sizeof(xmlParserCtxt));
1863	if (xmlInitParserCtxt(ctxt) < 0) {
1864	xmlFreeParserCtxt(ctxt);
1865	return(NULL);
1866	}
1867	return(ctxt);
1868	}
1869
1870	/************************************************************************
1871	* *
1872	* Handling of node informations *
1873	* *
1874	************************************************************************/
1875
1876	/**
1877	* xmlClearParserCtxt:
1878	* @ctxt: an XML parser context
1879	*
1880	* Clear (release owned resources) and reinitialize a parser context
1881	*/
1882
1883	void
1884	xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1885	{
1886	if (ctxt==NULL)
1887	return;
1888	xmlClearNodeInfoSeq(&ctxt->node_seq);
1889	xmlCtxtReset(ctxt);
1890	}
1891
1892
1893	/**
1894	* xmlParserFindNodeInfo:
1895	* @ctx: an XML parser context
1896	* @node: an XML node within the tree
1897	*
1898	* Find the parser node info struct for a given node
1899	*
1900	* Returns an xmlParserNodeInfo block pointer or NULL
1901	*/
1902	const xmlParserNodeInfo *
1903	xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1904	{
1905	unsigned long pos;
1906
1907	if ((ctx == NULL) \|\| (node == NULL))
1908	return (NULL);
1909	/* Find position where node should be at */
1910	pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1911	if (pos < ctx->node_seq.length
1912	&& ctx->node_seq.buffer[pos].node == node)
1913	return &ctx->node_seq.buffer[pos];
1914	else
1915	return NULL;
1916	}
1917
1918
1919	/**
1920	* xmlInitNodeInfoSeq:
1921	* @seq: a node info sequence pointer
1922	*
1923	* -- Initialize (set to initial state) node info sequence
1924	*/
1925	void
1926	xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1927	{
1928	if (seq == NULL)
1929	return;
1930	seq->length = 0;
1931	seq->maximum = 0;
1932	seq->buffer = NULL;
1933	}
1934
1935	/**
1936	* xmlClearNodeInfoSeq:
1937	* @seq: a node info sequence pointer
1938	*
1939	* -- Clear (release memory and reinitialize) node
1940	* info sequence
1941	*/
1942	void
1943	xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1944	{
1945	if (seq == NULL)
1946	return;
1947	if (seq->buffer != NULL)
1948	xmlFree(seq->buffer);
1949	xmlInitNodeInfoSeq(seq);
1950	}
1951
1952	/**
1953	* xmlParserFindNodeInfoIndex:
1954	* @seq: a node info sequence pointer
1955	* @node: an XML node pointer
1956	*
1957	*
1958	* xmlParserFindNodeInfoIndex : Find the index that the info record for
1959	* the given node is or should be at in a sorted sequence
1960	*
1961	* Returns a long indicating the position of the record
1962	*/
1963	unsigned long
1964	xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1965	const xmlNodePtr node)
1966	{
1967	unsigned long upper, lower, middle;
1968	int found = 0;
1969
1970	if ((seq == NULL) \|\| (node == NULL))
1971	return ((unsigned long) -1);
1972
1973	/* Do a binary search for the key */
1974	lower = 1;
1975	upper = seq->length;
1976	middle = 0;
1977	while (lower <= upper && !found) {
1978	middle = lower + (upper - lower) / 2;
1979	if (node == seq->buffer[middle - 1].node)
1980	found = 1;
1981	else if (node < seq->buffer[middle - 1].node)
1982	upper = middle - 1;
1983	else
1984	lower = middle + 1;
1985	}
1986
1987	/* Return position */
1988	if (middle == 0 \|\| seq->buffer[middle - 1].node < node)
1989	return middle;
1990	else
1991	return middle - 1;
1992	}
1993
1994
1995	/**
1996	* xmlParserAddNodeInfo:
1997	* @ctxt: an XML parser context
1998	* @info: a node info sequence pointer
1999	*
2000	* Insert node info record into the sorted sequence
2001	*/
2002	void
2003	xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2004	const xmlParserNodeInfoPtr info)
2005	{
2006	unsigned long pos;
2007
2008	if ((ctxt == NULL) \|\| (info == NULL)) return;
2009
2010	/* Find pos and check to see if node is already in the sequence */
2011	pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2012	info->node);
2013
2014	if ((pos < ctxt->node_seq.length) &&
2015	(ctxt->node_seq.buffer != NULL) &&
2016	(ctxt->node_seq.buffer[pos].node == info->node)) {
2017	ctxt->node_seq.buffer[pos] = *info;
2018	}
2019
2020	/* Otherwise, we need to add new node to buffer */
2021	else {
2022	if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) \|\|
2023	(ctxt->node_seq.buffer == NULL)) {
2024	xmlParserNodeInfo *tmp_buffer;
2025	unsigned int byte_size;
2026
2027	if (ctxt->node_seq.maximum == 0)
2028	ctxt->node_seq.maximum = 2;
2029	byte_size = (sizeof(ctxt->node_seq.buffer)
2030	(2 * ctxt->node_seq.maximum));
2031
2032	if (ctxt->node_seq.buffer == NULL)
2033	tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2034	else
2035	tmp_buffer =
2036	(xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2037	byte_size);
2038
2039	if (tmp_buffer == NULL) {
2040	xmlErrMemory(ctxt, "failed to allocate buffer\n");
2041	return;
2042	}
2043	ctxt->node_seq.buffer = tmp_buffer;
2044	ctxt->node_seq.maximum *= 2;
2045	}
2046
2047	/* If position is not at end, move elements out of the way */
2048	if (pos != ctxt->node_seq.length) {
2049	unsigned long i;
2050
2051	for (i = ctxt->node_seq.length; i > pos; i--)
2052	ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2053	}
2054
2055	/* Copy element and increase length */
2056	ctxt->node_seq.buffer[pos] = *info;
2057	ctxt->node_seq.length++;
2058	}
2059	}
2060
2061	/************************************************************************
2062	* *
2063	* Defaults settings *
2064	* *
2065	************************************************************************/
2066	/**
2067	* xmlPedanticParserDefault:
2068	* @val: int 0 or 1
2069	*
2070	* Set and return the previous value for enabling pedantic warnings.
2071	*
2072	* Returns the last value for 0 for no substitution, 1 for substitution.
2073	*/
2074
2075	int
2076	xmlPedanticParserDefault(int val) {
2077	int old = xmlPedanticParserDefaultValue;
2078
2079	xmlPedanticParserDefaultValue = val;
2080	return(old);
2081	}
2082
2083	/**
2084	* xmlLineNumbersDefault:
2085	* @val: int 0 or 1
2086	*
2087	* Set and return the previous value for enabling line numbers in elements
2088	* contents. This may break on old application and is turned off by default.
2089	*
2090	* Returns the last value for 0 for no substitution, 1 for substitution.
2091	*/
2092
2093	int
2094	xmlLineNumbersDefault(int val) {
2095	int old = xmlLineNumbersDefaultValue;
2096
2097	xmlLineNumbersDefaultValue = val;
2098	return(old);
2099	}
2100
2101	/**
2102	* xmlSubstituteEntitiesDefault:
2103	* @val: int 0 or 1
2104	*
2105	* Set and return the previous value for default entity support.
2106	* Initially the parser always keep entity references instead of substituting
2107	* entity values in the output. This function has to be used to change the
2108	* default parser behavior
2109	* SAX::substituteEntities() has to be used for changing that on a file by
2110	* file basis.
2111	*
2112	* Returns the last value for 0 for no substitution, 1 for substitution.
2113	*/
2114
2115	int
2116	xmlSubstituteEntitiesDefault(int val) {
2117	int old = xmlSubstituteEntitiesDefaultValue;
2118
2119	xmlSubstituteEntitiesDefaultValue = val;
2120	return(old);
2121	}
2122
2123	/**
2124	* xmlKeepBlanksDefault:
2125	* @val: int 0 or 1
2126	*
2127	* Set and return the previous value for default blanks text nodes support.
2128	* The 1.x version of the parser used an heuristic to try to detect
2129	* ignorable white spaces. As a result the SAX callback was generating
2130	* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2131	* using the DOM output text nodes containing those blanks were not generated.
2132	* The 2.x and later version will switch to the XML standard way and
2133	* ignorableWhitespace() are only generated when running the parser in
2134	* validating mode and when the current element doesn't allow CDATA or
2135	* mixed content.
2136	* This function is provided as a way to force the standard behavior
2137	* on 1.X libs and to switch back to the old mode for compatibility when
2138	* running 1.X client code on 2.X . Upgrade of 1.X code should be done
2139	* by using xmlIsBlankNode() commodity function to detect the "empty"
2140	* nodes generated.
2141	* This value also affect autogeneration of indentation when saving code
2142	* if blanks sections are kept, indentation is not generated.
2143	*
2144	* Returns the last value for 0 for no substitution, 1 for substitution.
2145	*/
2146
2147	int
2148	xmlKeepBlanksDefault(int val) {
2149	int old = xmlKeepBlanksDefaultValue;
2150
2151	xmlKeepBlanksDefaultValue = val;
2152	if (!val) xmlIndentTreeOutput = 1;
2153	return(old);
2154	}
2155
2156	#define bottom_parserInternals
2157	#include "elfgcchack.h"

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/libxml2-2.9.4/parserInternals.c@ 93943

Download in other formats: