xmlstring.c@ 93943

Last change on this file since 93943 was 65950, checked in by vboxsync, 8 years ago
libxml 2.9.4: fix export
Property svn:eol-style set to `native`
File size: 25.9 KB

Line
1	/*
2	* string.c : an XML string utilities module
3	*
4	* This module provides various utility functions for manipulating
5	* the xmlChar* type. All functions named xmlStr* have been moved here
6	* from the parser.c file (their original home).
7	*
8	* See Copyright for the status of this software.
9	*
10	* UTF8 string routines from:
11	* William Brack <wbrack@mmm.com.hk>
12	*
13	* daniel@veillard.com
14	*/
15
16	#define IN_LIBXML
17	#include "libxml.h"
18
19	#include <stdlib.h>
20	#include <string.h>
21	#include <libxml/xmlmemory.h>
22	#include <libxml/parserInternals.h>
23	#include <libxml/xmlstring.h>
24
25	/************************************************************************
26	* *
27	* Commodity functions to handle xmlChars *
28	* *
29	************************************************************************/
30
31	/**
32	* xmlStrndup:
33	* @cur: the input xmlChar *
34	* @len: the len of @cur
35	*
36	* a strndup for array of xmlChar's
37	*
38	* Returns a new xmlChar * or NULL
39	*/
40	xmlChar *
41	xmlStrndup(const xmlChar *cur, int len) {
42	xmlChar *ret;
43
44	if ((cur == NULL) \|\| (len < 0)) return(NULL);
45	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
46	if (ret == NULL) {
47	xmlErrMemory(NULL, NULL);
48	return(NULL);
49	}
50	memcpy(ret, cur, len * sizeof(xmlChar));
51	ret[len] = 0;
52	return(ret);
53	}
54
55	/**
56	* xmlStrdup:
57	* @cur: the input xmlChar *
58	*
59	* a strdup for array of xmlChar's. Since they are supposed to be
60	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
61	* a termination mark of '0'.
62	*
63	* Returns a new xmlChar * or NULL
64	*/
65	xmlChar *
66	xmlStrdup(const xmlChar *cur) {
67	const xmlChar *p = cur;
68
69	if (cur == NULL) return(NULL);
70	while (p != 0) p++; / non input consuming */
71	return(xmlStrndup(cur, p - cur));
72	}
73
74	/**
75	* xmlCharStrndup:
76	* @cur: the input char *
77	* @len: the len of @cur
78	*
79	* a strndup for char's to xmlChar's
80	*
81	* Returns a new xmlChar * or NULL
82	*/
83
84	xmlChar *
85	xmlCharStrndup(const char *cur, int len) {
86	int i;
87	xmlChar *ret;
88
89	if ((cur == NULL) \|\| (len < 0)) return(NULL);
90	ret = (xmlChar ) xmlMallocAtomic((len + 1) sizeof(xmlChar));
91	if (ret == NULL) {
92	xmlErrMemory(NULL, NULL);
93	return(NULL);
94	}
95	for (i = 0;i < len;i++) {
96	ret[i] = (xmlChar) cur[i];
97	if (ret[i] == 0) return(ret);
98	}
99	ret[len] = 0;
100	return(ret);
101	}
102
103	/**
104	* xmlCharStrdup:
105	* @cur: the input char *
106	*
107	* a strdup for char's to xmlChar's
108	*
109	* Returns a new xmlChar * or NULL
110	*/
111
112	xmlChar *
113	xmlCharStrdup(const char *cur) {
114	const char *p = cur;
115
116	if (cur == NULL) return(NULL);
117	while (p != '\0') p++; / non input consuming */
118	return(xmlCharStrndup(cur, p - cur));
119	}
120
121	/**
122	* xmlStrcmp:
123	* @str1: the first xmlChar *
124	* @str2: the second xmlChar *
125	*
126	* a strcmp for xmlChar's
127	*
128	* Returns the integer result of the comparison
129	*/
130
131	int
132	xmlStrcmp(const xmlChar str1, const xmlChar str2) {
133	register int tmp;
134
135	if (str1 == str2) return(0);
136	if (str1 == NULL) return(-1);
137	if (str2 == NULL) return(1);
138	do {
139	tmp = str1++ - str2;
140	if (tmp != 0) return(tmp);
141	} while (*str2++ != 0);
142	return 0;
143	}
144
145	/**
146	* xmlStrEqual:
147	* @str1: the first xmlChar *
148	* @str2: the second xmlChar *
149	*
150	* Check if both strings are equal of have same content.
151	* Should be a bit more readable and faster than xmlStrcmp()
152	*
153	* Returns 1 if they are equal, 0 if they are different
154	*/
155
156	int
157	xmlStrEqual(const xmlChar str1, const xmlChar str2) {
158	if (str1 == str2) return(1);
159	if (str1 == NULL) return(0);
160	if (str2 == NULL) return(0);
161	do {
162	if (str1++ != str2) return(0);
163	} while (*str2++);
164	return(1);
165	}
166
167	/**
168	* xmlStrQEqual:
169	* @pref: the prefix of the QName
170	* @name: the localname of the QName
171	* @str: the second xmlChar *
172	*
173	* Check if a QName is Equal to a given string
174	*
175	* Returns 1 if they are equal, 0 if they are different
176	*/
177
178	int
179	xmlStrQEqual(const xmlChar pref, const xmlChar name, const xmlChar *str) {
180	if (pref == NULL) return(xmlStrEqual(name, str));
181	if (name == NULL) return(0);
182	if (str == NULL) return(0);
183
184	do {
185	if (pref++ != str) return(0);
186	} while ((str++) && (pref));
187	if (*str++ != ':') return(0);
188	do {
189	if (name++ != str) return(0);
190	} while (*str++);
191	return(1);
192	}
193
194	/**
195	* xmlStrncmp:
196	* @str1: the first xmlChar *
197	* @str2: the second xmlChar *
198	* @len: the max comparison length
199	*
200	* a strncmp for xmlChar's
201	*
202	* Returns the integer result of the comparison
203	*/
204
205	int
206	xmlStrncmp(const xmlChar str1, const xmlChar str2, int len) {
207	register int tmp;
208
209	if (len <= 0) return(0);
210	if (str1 == str2) return(0);
211	if (str1 == NULL) return(-1);
212	if (str2 == NULL) return(1);
213	#ifdef __GNUC__
214	tmp = strncmp((const char )str1, (const char )str2, len);
215	return tmp;
216	#else
217	do {
218	tmp = str1++ - str2;
219	if (tmp != 0 \|\| --len == 0) return(tmp);
220	} while (*str2++ != 0);
221	return 0;
222	#endif
223	}
224
225	static const xmlChar casemap[256] = {
226	0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
227	0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
228	0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
229	0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
230	0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
231	0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
232	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
233	0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
234	0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
235	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
236	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
237	0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
238	0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
239	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
240	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
241	0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
242	0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
243	0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
244	0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
245	0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
246	0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
247	0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
248	0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
249	0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
250	0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
251	0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
252	0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
253	0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
254	0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
255	0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
256	0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
257	0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
258	};
259
260	/**
261	* xmlStrcasecmp:
262	* @str1: the first xmlChar *
263	* @str2: the second xmlChar *
264	*
265	* a strcasecmp for xmlChar's
266	*
267	* Returns the integer result of the comparison
268	*/
269
270	int
271	xmlStrcasecmp(const xmlChar str1, const xmlChar str2) {
272	register int tmp;
273
274	if (str1 == str2) return(0);
275	if (str1 == NULL) return(-1);
276	if (str2 == NULL) return(1);
277	do {
278	tmp = casemap[str1++] - casemap[str2];
279	if (tmp != 0) return(tmp);
280	} while (*str2++ != 0);
281	return 0;
282	}
283
284	/**
285	* xmlStrncasecmp:
286	* @str1: the first xmlChar *
287	* @str2: the second xmlChar *
288	* @len: the max comparison length
289	*
290	* a strncasecmp for xmlChar's
291	*
292	* Returns the integer result of the comparison
293	*/
294
295	int
296	xmlStrncasecmp(const xmlChar str1, const xmlChar str2, int len) {
297	register int tmp;
298
299	if (len <= 0) return(0);
300	if (str1 == str2) return(0);
301	if (str1 == NULL) return(-1);
302	if (str2 == NULL) return(1);
303	do {
304	tmp = casemap[str1++] - casemap[str2];
305	if (tmp != 0 \|\| --len == 0) return(tmp);
306	} while (*str2++ != 0);
307	return 0;
308	}
309
310	/**
311	* xmlStrchr:
312	* @str: the xmlChar * array
313	* @val: the xmlChar to search
314	*
315	* a strchr for xmlChar's
316	*
317	* Returns the xmlChar * for the first occurrence or NULL.
318	*/
319
320	const xmlChar *
321	xmlStrchr(const xmlChar *str, xmlChar val) {
322	if (str == NULL) return(NULL);
323	while (str != 0) { / non input consuming */
324	if (str == val) return((xmlChar ) str);
325	str++;
326	}
327	return(NULL);
328	}
329
330	/**
331	* xmlStrstr:
332	* @str: the xmlChar * array (haystack)
333	* @val: the xmlChar to search (needle)
334	*
335	* a strstr for xmlChar's
336	*
337	* Returns the xmlChar * for the first occurrence or NULL.
338	*/
339
340	const xmlChar *
341	xmlStrstr(const xmlChar str, const xmlChar val) {
342	int n;
343
344	if (str == NULL) return(NULL);
345	if (val == NULL) return(NULL);
346	n = xmlStrlen(val);
347
348	if (n == 0) return(str);
349	while (str != 0) { / non input consuming */
350	if (str == val) {
351	if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
352	}
353	str++;
354	}
355	return(NULL);
356	}
357
358	/**
359	* xmlStrcasestr:
360	* @str: the xmlChar * array (haystack)
361	* @val: the xmlChar to search (needle)
362	*
363	* a case-ignoring strstr for xmlChar's
364	*
365	* Returns the xmlChar * for the first occurrence or NULL.
366	*/
367
368	const xmlChar *
369	xmlStrcasestr(const xmlChar str, const xmlChar val) {
370	int n;
371
372	if (str == NULL) return(NULL);
373	if (val == NULL) return(NULL);
374	n = xmlStrlen(val);
375
376	if (n == 0) return(str);
377	while (str != 0) { / non input consuming */
378	if (casemap[str] == casemap[val])
379	if (!xmlStrncasecmp(str, val, n)) return(str);
380	str++;
381	}
382	return(NULL);
383	}
384
385	/**
386	* xmlStrsub:
387	* @str: the xmlChar * array (haystack)
388	* @start: the index of the first char (zero based)
389	* @len: the length of the substring
390	*
391	* Extract a substring of a given string
392	*
393	* Returns the xmlChar * for the first occurrence or NULL.
394	*/
395
396	xmlChar *
397	xmlStrsub(const xmlChar *str, int start, int len) {
398	int i;
399
400	if (str == NULL) return(NULL);
401	if (start < 0) return(NULL);
402	if (len < 0) return(NULL);
403
404	for (i = 0;i < start;i++) {
405	if (*str == 0) return(NULL);
406	str++;
407	}
408	if (*str == 0) return(NULL);
409	return(xmlStrndup(str, len));
410	}
411
412	/**
413	* xmlStrlen:
414	* @str: the xmlChar * array
415	*
416	* length of a xmlChar's string
417	*
418	* Returns the number of xmlChar contained in the ARRAY.
419	*/
420
421	int
422	xmlStrlen(const xmlChar *str) {
423	int len = 0;
424
425	if (str == NULL) return(0);
426	while (str != 0) { / non input consuming */
427	str++;
428	len++;
429	}
430	return(len);
431	}
432
433	/**
434	* xmlStrncat:
435	* @cur: the original xmlChar * array
436	* @add: the xmlChar * array added
437	* @len: the length of @add
438	*
439	* a strncat for array of xmlChar's, it will extend @cur with the len
440	* first bytes of @add. Note that if @len < 0 then this is an API error
441	* and NULL will be returned.
442	*
443	* Returns a new xmlChar *, the original @cur is reallocated if needed
444	* and should not be freed
445	*/
446
447	xmlChar *
448	xmlStrncat(xmlChar cur, const xmlChar add, int len) {
449	int size;
450	xmlChar *ret;
451
452	if ((add == NULL) \|\| (len == 0))
453	return(cur);
454	if (len < 0)
455	return(NULL);
456	if (cur == NULL)
457	return(xmlStrndup(add, len));
458
459	size = xmlStrlen(cur);
460	if (size < 0)
461	return(NULL);
462	ret = (xmlChar ) xmlRealloc(cur, (size + len + 1) sizeof(xmlChar));
463	if (ret == NULL) {
464	xmlErrMemory(NULL, NULL);
465	return(cur);
466	}
467	memcpy(&ret[size], add, len * sizeof(xmlChar));
468	ret[size + len] = 0;
469	return(ret);
470	}
471
472	/**
473	* xmlStrncatNew:
474	* @str1: first xmlChar string
475	* @str2: second xmlChar string
476	* @len: the len of @str2 or < 0
477	*
478	* same as xmlStrncat, but creates a new string. The original
479	* two strings are not freed. If @len is < 0 then the length
480	* will be calculated automatically.
481	*
482	* Returns a new xmlChar * or NULL
483	*/
484	xmlChar *
485	xmlStrncatNew(const xmlChar str1, const xmlChar str2, int len) {
486	int size;
487	xmlChar *ret;
488
489	if (len < 0) {
490	len = xmlStrlen(str2);
491	if (len < 0)
492	return(NULL);
493	}
494	if ((str2 == NULL) \|\| (len == 0))
495	return(xmlStrdup(str1));
496	if (str1 == NULL)
497	return(xmlStrndup(str2, len));
498
499	size = xmlStrlen(str1);
500	if (size < 0)
501	return(NULL);
502	ret = (xmlChar ) xmlMalloc((size + len + 1) sizeof(xmlChar));
503	if (ret == NULL) {
504	xmlErrMemory(NULL, NULL);
505	return(xmlStrndup(str1, size));
506	}
507	memcpy(ret, str1, size * sizeof(xmlChar));
508	memcpy(&ret[size], str2, len * sizeof(xmlChar));
509	ret[size + len] = 0;
510	return(ret);
511	}
512
513	/**
514	* xmlStrcat:
515	* @cur: the original xmlChar * array
516	* @add: the xmlChar * array added
517	*
518	* a strcat for array of xmlChar's. Since they are supposed to be
519	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
520	* a termination mark of '0'.
521	*
522	* Returns a new xmlChar * containing the concatenated string.
523	*/
524	xmlChar *
525	xmlStrcat(xmlChar cur, const xmlChar add) {
526	const xmlChar *p = add;
527
528	if (add == NULL) return(cur);
529	if (cur == NULL)
530	return(xmlStrdup(add));
531
532	while (p != 0) p++; / non input consuming */
533	return(xmlStrncat(cur, add, p - add));
534	}
535
536	/**
537	* xmlStrPrintf:
538	* @buf: the result buffer.
539	* @len: the result buffer length.
540	* @msg: the message with printf formatting.
541	* @...: extra parameters for the message.
542	*
543	* Formats @msg and places result into @buf.
544	*
545	* Returns the number of characters written to @buf or -1 if an error occurs.
546	*/
547	int XMLCDECL
548	xmlStrPrintf(xmlChar buf, int len, const char msg, ...) {
549	va_list args;
550	int ret;
551
552	if((buf == NULL) \|\| (msg == NULL)) {
553	return(-1);
554	}
555
556	va_start(args, msg);
557	ret = vsnprintf((char ) buf, len, (const char ) msg, args);
558	va_end(args);
559	buf[len - 1] = 0; /* be safe ! */
560
561	return(ret);
562	}
563
564	/**
565	* xmlStrVPrintf:
566	* @buf: the result buffer.
567	* @len: the result buffer length.
568	* @msg: the message with printf formatting.
569	* @ap: extra parameters for the message.
570	*
571	* Formats @msg and places result into @buf.
572	*
573	* Returns the number of characters written to @buf or -1 if an error occurs.
574	*/
575	int
576	xmlStrVPrintf(xmlChar buf, int len, const char msg, va_list ap) {
577	int ret;
578
579	if((buf == NULL) \|\| (msg == NULL)) {
580	return(-1);
581	}
582
583	ret = vsnprintf((char ) buf, len, (const char ) msg, ap);
584	buf[len - 1] = 0; /* be safe ! */
585
586	return(ret);
587	}
588
589	/************************************************************************
590	* *
591	* Generic UTF8 handling routines *
592	* *
593	* From rfc2044: encoding of the Unicode values on UTF-8: *
594	* *
595	* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
596	* 0000 0000-0000 007F 0xxxxxxx *
597	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
598	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
599	* *
600	* I hope we won't use values > 0xFFFF anytime soon ! *
601	* *
602	************************************************************************/
603
604
605	/**
606	* xmlUTF8Size:
607	* @utf: pointer to the UTF8 character
608	*
609	* calculates the internal size of a UTF8 character
610	*
611	* returns the numbers of bytes in the character, -1 on format error
612	*/
613	int
614	xmlUTF8Size(const xmlChar *utf) {
615	xmlChar mask;
616	int len;
617
618	if (utf == NULL)
619	return -1;
620	if (*utf < 0x80)
621	return 1;
622	/* check valid UTF8 character */
623	if (!(*utf & 0x40))
624	return -1;
625	/* determine number of bytes in char */
626	len = 2;
627	for (mask=0x20; mask != 0; mask>>=1) {
628	if (!(*utf & mask))
629	return len;
630	len++;
631	}
632	return -1;
633	}
634
635	/**
636	* xmlUTF8Charcmp:
637	* @utf1: pointer to first UTF8 char
638	* @utf2: pointer to second UTF8 char
639	*
640	* compares the two UCS4 values
641	*
642	* returns result of the compare as with xmlStrncmp
643	*/
644	int
645	xmlUTF8Charcmp(const xmlChar utf1, const xmlChar utf2) {
646
647	if (utf1 == NULL ) {
648	if (utf2 == NULL)
649	return 0;
650	return -1;
651	}
652	return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
653	}
654
655	/**
656	* xmlUTF8Strlen:
657	* @utf: a sequence of UTF-8 encoded bytes
658	*
659	* compute the length of an UTF8 string, it doesn't do a full UTF8
660	* checking of the content of the string.
661	*
662	* Returns the number of characters in the string or -1 in case of error
663	*/
664	int
665	xmlUTF8Strlen(const xmlChar *utf) {
666	int ret = 0;
667
668	if (utf == NULL)
669	return(-1);
670
671	while (*utf != 0) {
672	if (utf[0] & 0x80) {
673	if ((utf[1] & 0xc0) != 0x80)
674	return(-1);
675	if ((utf[0] & 0xe0) == 0xe0) {
676	if ((utf[2] & 0xc0) != 0x80)
677	return(-1);
678	if ((utf[0] & 0xf0) == 0xf0) {
679	if ((utf[0] & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
680	return(-1);
681	utf += 4;
682	} else {
683	utf += 3;
684	}
685	} else {
686	utf += 2;
687	}
688	} else {
689	utf++;
690	}
691	ret++;
692	}
693	return(ret);
694	}
695
696	/**
697	* xmlGetUTF8Char:
698	* @utf: a sequence of UTF-8 encoded bytes
699	* @len: a pointer to the minimum number of bytes present in
700	* the sequence. This is used to assure the next character
701	* is completely contained within the sequence.
702	*
703	* Read the first UTF8 character from @utf
704	*
705	* Returns the char value or -1 in case of error, and sets *len to
706	* the actual number of bytes consumed (0 in case of error)
707	*/
708	int
709	xmlGetUTF8Char(const unsigned char utf, int len) {
710	unsigned int c;
711
712	if (utf == NULL)
713	goto error;
714	if (len == NULL)
715	goto error;
716	if (*len < 1)
717	goto error;
718
719	c = utf[0];
720	if (c & 0x80) {
721	if (*len < 2)
722	goto error;
723	if ((utf[1] & 0xc0) != 0x80)
724	goto error;
725	if ((c & 0xe0) == 0xe0) {
726	if (*len < 3)
727	goto error;
728	if ((utf[2] & 0xc0) != 0x80)
729	goto error;
730	if ((c & 0xf0) == 0xf0) {
731	if (*len < 4)
732	goto error;
733	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
734	goto error;
735	*len = 4;
736	/* 4-byte code */
737	c = (utf[0] & 0x7) << 18;
738	c \|= (utf[1] & 0x3f) << 12;
739	c \|= (utf[2] & 0x3f) << 6;
740	c \|= utf[3] & 0x3f;
741	} else {
742	/* 3-byte code */
743	*len = 3;
744	c = (utf[0] & 0xf) << 12;
745	c \|= (utf[1] & 0x3f) << 6;
746	c \|= utf[2] & 0x3f;
747	}
748	} else {
749	/* 2-byte code */
750	*len = 2;
751	c = (utf[0] & 0x1f) << 6;
752	c \|= utf[1] & 0x3f;
753	}
754	} else {
755	/* 1-byte code */
756	*len = 1;
757	}
758	return(c);
759
760	error:
761	if (len != NULL)
762	*len = 0;
763	return(-1);
764	}
765
766	/**
767	* xmlCheckUTF8:
768	* @utf: Pointer to putative UTF-8 encoded string.
769	*
770	* Checks @utf for being valid UTF-8. @utf is assumed to be
771	* null-terminated. This function is not super-strict, as it will
772	* allow longer UTF-8 sequences than necessary. Note that Java is
773	* capable of producing these sequences if provoked. Also note, this
774	* routine checks for the 4-byte maximum size, but does not check for
775	* 0x10ffff maximum value.
776	*
777	* Return value: true if @utf is valid.
778	**/
779	int
780	xmlCheckUTF8(const unsigned char *utf)
781	{
782	int ix;
783	unsigned char c;
784
785	if (utf == NULL)
786	return(0);
787	/*
788	* utf is a string of 1, 2, 3 or 4 bytes. The valid strings
789	* are as follows (in "bit format"):
790	* 0xxxxxxx valid 1-byte
791	* 110xxxxx 10xxxxxx valid 2-byte
792	* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
793	* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
794	*/
795	for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
796	if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
797	ix++;
798	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
799	if ((utf[ix+1] & 0xc0 ) != 0x80)
800	return 0;
801	ix += 2;
802	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
803	if (((utf[ix+1] & 0xc0) != 0x80) \|\|
804	((utf[ix+2] & 0xc0) != 0x80))
805	return 0;
806	ix += 3;
807	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
808	if (((utf[ix+1] & 0xc0) != 0x80) \|\|
809	((utf[ix+2] & 0xc0) != 0x80) \|\|
810	((utf[ix+3] & 0xc0) != 0x80))
811	return 0;
812	ix += 4;
813	} else /* unknown encoding */
814	return 0;
815	}
816	return(1);
817	}
818
819	/**
820	* xmlUTF8Strsize:
821	* @utf: a sequence of UTF-8 encoded bytes
822	* @len: the number of characters in the array
823	*
824	* storage size of an UTF8 string
825	* the behaviour is not garanteed if the input string is not UTF-8
826	*
827	* Returns the storage size of
828	* the first 'len' characters of ARRAY
829	*/
830
831	int
832	xmlUTF8Strsize(const xmlChar *utf, int len) {
833	const xmlChar *ptr=utf;
834	xmlChar ch;
835
836	if (utf == NULL)
837	return(0);
838
839	if (len <= 0)
840	return(0);
841
842	while ( len-- > 0) {
843	if ( !*ptr )
844	break;
845	if ( (ch = *ptr++) & 0x80)
846	while ((ch<<=1) & 0x80 ) {
847	if (*ptr == 0) break;
848	ptr++;
849	}
850	}
851	return (ptr - utf);
852	}
853
854
855	/**
856	* xmlUTF8Strndup:
857	* @utf: the input UTF8 *
858	* @len: the len of @utf (in chars)
859	*
860	* a strndup for array of UTF8's
861	*
862	* Returns a new UTF8 * or NULL
863	*/
864	xmlChar *
865	xmlUTF8Strndup(const xmlChar *utf, int len) {
866	xmlChar *ret;
867	int i;
868
869	if ((utf == NULL) \|\| (len < 0)) return(NULL);
870	i = xmlUTF8Strsize(utf, len);
871	ret = (xmlChar ) xmlMallocAtomic((i + 1) sizeof(xmlChar));
872	if (ret == NULL) {
873	xmlGenericError(xmlGenericErrorContext,
874	"malloc of %ld byte failed\n",
875	(len + 1) * (long)sizeof(xmlChar));
876	return(NULL);
877	}
878	memcpy(ret, utf, i * sizeof(xmlChar));
879	ret[i] = 0;
880	return(ret);
881	}
882
883	/**
884	* xmlUTF8Strpos:
885	* @utf: the input UTF8 *
886	* @pos: the position of the desired UTF8 char (in chars)
887	*
888	* a function to provide the equivalent of fetching a
889	* character from a string array
890	*
891	* Returns a pointer to the UTF8 character or NULL
892	*/
893	const xmlChar *
894	xmlUTF8Strpos(const xmlChar *utf, int pos) {
895	xmlChar ch;
896
897	if (utf == NULL) return(NULL);
898	if (pos < 0)
899	return(NULL);
900	while (pos--) {
901	if ((ch=*utf++) == 0) return(NULL);
902	if ( ch & 0x80 ) {
903	/* if not simple ascii, verify proper format */
904	if ( (ch & 0xc0) != 0xc0 )
905	return(NULL);
906	/* then skip over remaining bytes for this char */
907	while ( (ch <<= 1) & 0x80 )
908	if ( (*utf++ & 0xc0) != 0x80 )
909	return(NULL);
910	}
911	}
912	return((xmlChar *)utf);
913	}
914
915	/**
916	* xmlUTF8Strloc:
917	* @utf: the input UTF8 *
918	* @utfchar: the UTF8 character to be found
919	*
920	* a function to provide the relative location of a UTF8 char
921	*
922	* Returns the relative character position of the desired char
923	* or -1 if not found
924	*/
925	int
926	xmlUTF8Strloc(const xmlChar utf, const xmlChar utfchar) {
927	int i, size;
928	xmlChar ch;
929
930	if (utf==NULL \|\| utfchar==NULL) return -1;
931	size = xmlUTF8Strsize(utfchar, 1);
932	for(i=0; (ch=*utf) != 0; i++) {
933	if (xmlStrncmp(utf, utfchar, size)==0)
934	return(i);
935	utf++;
936	if ( ch & 0x80 ) {
937	/* if not simple ascii, verify proper format */
938	if ( (ch & 0xc0) != 0xc0 )
939	return(-1);
940	/* then skip over remaining bytes for this char */
941	while ( (ch <<= 1) & 0x80 )
942	if ( (*utf++ & 0xc0) != 0x80 )
943	return(-1);
944	}
945	}
946
947	return(-1);
948	}
949	/**
950	* xmlUTF8Strsub:
951	* @utf: a sequence of UTF-8 encoded bytes
952	* @start: relative pos of first char
953	* @len: total number to copy
954	*
955	* Create a substring from a given UTF-8 string
956	* Note: positions are given in units of UTF-8 chars
957	*
958	* Returns a pointer to a newly created string
959	* or NULL if any problem
960	*/
961
962	xmlChar *
963	xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
964	int i;
965	xmlChar ch;
966
967	if (utf == NULL) return(NULL);
968	if (start < 0) return(NULL);
969	if (len < 0) return(NULL);
970
971	/*
972	* Skip over any leading chars
973	*/
974	for (i = 0;i < start;i++) {
975	if ((ch=*utf++) == 0) return(NULL);
976	if ( ch & 0x80 ) {
977	/* if not simple ascii, verify proper format */
978	if ( (ch & 0xc0) != 0xc0 )
979	return(NULL);
980	/* then skip over remaining bytes for this char */
981	while ( (ch <<= 1) & 0x80 )
982	if ( (*utf++ & 0xc0) != 0x80 )
983	return(NULL);
984	}
985	}
986
987	return(xmlUTF8Strndup(utf, len));
988	}
989
990	/**
991	* xmlEscapeFormatString:
992	* @msg: a pointer to the string in which to escape '%' characters.
993	* Must be a heap-allocated buffer created by libxml2 that may be
994	* returned, or that may be freed and replaced.
995	*
996	* Replaces the string pointed to by 'msg' with an escaped string.
997	* Returns the same string with all '%' characters escaped.
998	*/
999	xmlChar *
1000	xmlEscapeFormatString(xmlChar **msg)
1001	{
1002	xmlChar *msgPtr = NULL;
1003	xmlChar *result = NULL;
1004	xmlChar *resultPtr = NULL;
1005	size_t count = 0;
1006	size_t msgLen = 0;
1007	size_t resultLen = 0;
1008
1009	if (!msg \|\| !*msg)
1010	return(NULL);
1011
1012	for (msgPtr = msg; msgPtr != '\0'; ++msgPtr) {
1013	++msgLen;
1014	if (*msgPtr == '%')
1015	++count;
1016	}
1017
1018	if (count == 0)
1019	return(*msg);
1020
1021	resultLen = msgLen + count + 1;
1022	result = (xmlChar ) xmlMallocAtomic(resultLen sizeof(xmlChar));
1023	if (result == NULL) {
1024	/* Clear *msg to prevent format string vulnerabilities in
1025	out-of-memory situations. */
1026	xmlFree(*msg);
1027	*msg = NULL;
1028	xmlErrMemory(NULL, NULL);
1029	return(NULL);
1030	}
1031
1032	for (msgPtr = msg, resultPtr = result; msgPtr != '\0'; ++msgPtr, ++resultPtr) {
1033	resultPtr = msgPtr;
1034	if (*msgPtr == '%')
1035	*(++resultPtr) = '%';
1036	}
1037	result[resultLen - 1] = '\0';
1038
1039	xmlFree(*msg);
1040	*msg = result;
1041
1042	return *msg;
1043	}
1044
1045	#define bottom_xmlstring
1046	#include "elfgcchack.h"

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/libxml2-2.9.4/xmlstring.c@ 93943

Download in other formats: