xmlstring.c@ 102654

Last change on this file since 102654 was 95312, checked in by vboxsync, 2 years ago
libs/{curl,libxml2}: OSE export fixes, bugref:8515
Property svn:eol-style set to `native`
File size: 26.4 KB

Line
1	/*
2	* string.c : an XML string utilities module
3	*
4	* This module provides various utility functions for manipulating
5	* the xmlChar* type. All functions named xmlStr* have been moved here
6	* from the parser.c file (their original home).
7	*
8	* See Copyright for the status of this software.
9	*
10	* UTF8 string routines from:
11	* William Brack <wbrack@mmm.com.hk>
12	*
13	* daniel@veillard.com
14	*/
15
16	#define IN_LIBXML
17	#include "libxml.h"
18
19	#include <stdlib.h>
20	#include <string.h>
21	#include <limits.h>
22	#include <libxml/xmlmemory.h>
23	#include <libxml/parserInternals.h>
24	#include <libxml/xmlstring.h>
25
26	/************************************************************************
27	* *
28	* Commodity functions to handle xmlChars *
29	* *
30	************************************************************************/
31
32	/**
33	* xmlStrndup:
34	* @cur: the input xmlChar *
35	* @len: the len of @cur
36	*
37	* a strndup for array of xmlChar's
38	*
39	* Returns a new xmlChar * or NULL
40	*/
41	xmlChar *
42	xmlStrndup(const xmlChar *cur, int len) {
43	xmlChar *ret;
44
45	if ((cur == NULL) \|\| (len < 0)) return(NULL);
46	ret = (xmlChar ) xmlMallocAtomic(((size_t) len + 1) sizeof(xmlChar));
47	if (ret == NULL) {
48	xmlErrMemory(NULL, NULL);
49	return(NULL);
50	}
51	memcpy(ret, cur, len * sizeof(xmlChar));
52	ret[len] = 0;
53	return(ret);
54	}
55
56	/**
57	* xmlStrdup:
58	* @cur: the input xmlChar *
59	*
60	* a strdup for array of xmlChar's. Since they are supposed to be
61	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
62	* a termination mark of '0'.
63	*
64	* Returns a new xmlChar * or NULL
65	*/
66	xmlChar *
67	xmlStrdup(const xmlChar *cur) {
68	const xmlChar *p = cur;
69
70	if (cur == NULL) return(NULL);
71	while (p != 0) p++; / non input consuming */
72	return(xmlStrndup(cur, p - cur));
73	}
74
75	/**
76	* xmlCharStrndup:
77	* @cur: the input char *
78	* @len: the len of @cur
79	*
80	* a strndup for char's to xmlChar's
81	*
82	* Returns a new xmlChar * or NULL
83	*/
84
85	xmlChar *
86	xmlCharStrndup(const char *cur, int len) {
87	int i;
88	xmlChar *ret;
89
90	if ((cur == NULL) \|\| (len < 0)) return(NULL);
91	ret = (xmlChar ) xmlMallocAtomic(((size_t) len + 1) sizeof(xmlChar));
92	if (ret == NULL) {
93	xmlErrMemory(NULL, NULL);
94	return(NULL);
95	}
96	for (i = 0;i < len;i++) {
97	ret[i] = (xmlChar) cur[i];
98	if (ret[i] == 0) return(ret);
99	}
100	ret[len] = 0;
101	return(ret);
102	}
103
104	/**
105	* xmlCharStrdup:
106	* @cur: the input char *
107	*
108	* a strdup for char's to xmlChar's
109	*
110	* Returns a new xmlChar * or NULL
111	*/
112
113	xmlChar *
114	xmlCharStrdup(const char *cur) {
115	const char *p = cur;
116
117	if (cur == NULL) return(NULL);
118	while (p != '\0') p++; / non input consuming */
119	return(xmlCharStrndup(cur, p - cur));
120	}
121
122	/**
123	* xmlStrcmp:
124	* @str1: the first xmlChar *
125	* @str2: the second xmlChar *
126	*
127	* a strcmp for xmlChar's
128	*
129	* Returns the integer result of the comparison
130	*/
131
132	int
133	xmlStrcmp(const xmlChar str1, const xmlChar str2) {
134	if (str1 == str2) return(0);
135	if (str1 == NULL) return(-1);
136	if (str2 == NULL) return(1);
137	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
138	return(strcmp((const char )str1, (const char )str2));
139	#else
140	do {
141	int tmp = str1++ - str2;
142	if (tmp != 0) return(tmp);
143	} while (*str2++ != 0);
144	return 0;
145	#endif
146	}
147
148	/**
149	* xmlStrEqual:
150	* @str1: the first xmlChar *
151	* @str2: the second xmlChar *
152	*
153	* Check if both strings are equal of have same content.
154	* Should be a bit more readable and faster than xmlStrcmp()
155	*
156	* Returns 1 if they are equal, 0 if they are different
157	*/
158
159	int
160	xmlStrEqual(const xmlChar str1, const xmlChar str2) {
161	if (str1 == str2) return(1);
162	if (str1 == NULL) return(0);
163	if (str2 == NULL) return(0);
164	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
165	return(strcmp((const char )str1, (const char )str2) == 0);
166	#else
167	do {
168	if (str1++ != str2) return(0);
169	} while (*str2++);
170	return(1);
171	#endif
172	}
173
174	/**
175	* xmlStrQEqual:
176	* @pref: the prefix of the QName
177	* @name: the localname of the QName
178	* @str: the second xmlChar *
179	*
180	* Check if a QName is Equal to a given string
181	*
182	* Returns 1 if they are equal, 0 if they are different
183	*/
184
185	int
186	xmlStrQEqual(const xmlChar pref, const xmlChar name, const xmlChar *str) {
187	if (pref == NULL) return(xmlStrEqual(name, str));
188	if (name == NULL) return(0);
189	if (str == NULL) return(0);
190
191	do {
192	if (pref++ != str) return(0);
193	} while ((str++) && (pref));
194	if (*str++ != ':') return(0);
195	do {
196	if (name++ != str) return(0);
197	} while (*str++);
198	return(1);
199	}
200
201	/**
202	* xmlStrncmp:
203	* @str1: the first xmlChar *
204	* @str2: the second xmlChar *
205	* @len: the max comparison length
206	*
207	* a strncmp for xmlChar's
208	*
209	* Returns the integer result of the comparison
210	*/
211
212	int
213	xmlStrncmp(const xmlChar str1, const xmlChar str2, int len) {
214	if (len <= 0) return(0);
215	if (str1 == str2) return(0);
216	if (str1 == NULL) return(-1);
217	if (str2 == NULL) return(1);
218	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
219	return(strncmp((const char )str1, (const char )str2, len));
220	#else
221	do {
222	int tmp = str1++ - str2;
223	if (tmp != 0 \|\| --len == 0) return(tmp);
224	} while (*str2++ != 0);
225	return 0;
226	#endif
227	}
228
229	static const xmlChar casemap[256] = {
230	0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
231	0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
232	0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
233	0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
234	0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
235	0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
236	0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
237	0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
238	0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
239	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
240	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
241	0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
242	0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
243	0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
244	0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
245	0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
246	0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
247	0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
248	0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
249	0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
250	0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
251	0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
252	0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
253	0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
254	0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
255	0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
256	0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
257	0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
258	0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
259	0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
260	0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
261	0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
262	};
263
264	/**
265	* xmlStrcasecmp:
266	* @str1: the first xmlChar *
267	* @str2: the second xmlChar *
268	*
269	* a strcasecmp for xmlChar's
270	*
271	* Returns the integer result of the comparison
272	*/
273
274	int
275	xmlStrcasecmp(const xmlChar str1, const xmlChar str2) {
276	register int tmp;
277
278	if (str1 == str2) return(0);
279	if (str1 == NULL) return(-1);
280	if (str2 == NULL) return(1);
281	do {
282	tmp = casemap[str1++] - casemap[str2];
283	if (tmp != 0) return(tmp);
284	} while (*str2++ != 0);
285	return 0;
286	}
287
288	/**
289	* xmlStrncasecmp:
290	* @str1: the first xmlChar *
291	* @str2: the second xmlChar *
292	* @len: the max comparison length
293	*
294	* a strncasecmp for xmlChar's
295	*
296	* Returns the integer result of the comparison
297	*/
298
299	int
300	xmlStrncasecmp(const xmlChar str1, const xmlChar str2, int len) {
301	register int tmp;
302
303	if (len <= 0) return(0);
304	if (str1 == str2) return(0);
305	if (str1 == NULL) return(-1);
306	if (str2 == NULL) return(1);
307	do {
308	tmp = casemap[str1++] - casemap[str2];
309	if (tmp != 0 \|\| --len == 0) return(tmp);
310	} while (*str2++ != 0);
311	return 0;
312	}
313
314	/**
315	* xmlStrchr:
316	* @str: the xmlChar * array
317	* @val: the xmlChar to search
318	*
319	* a strchr for xmlChar's
320	*
321	* Returns the xmlChar * for the first occurrence or NULL.
322	*/
323
324	const xmlChar *
325	xmlStrchr(const xmlChar *str, xmlChar val) {
326	if (str == NULL) return(NULL);
327	while (str != 0) { / non input consuming */
328	if (str == val) return((xmlChar ) str);
329	str++;
330	}
331	return(NULL);
332	}
333
334	/**
335	* xmlStrstr:
336	* @str: the xmlChar * array (haystack)
337	* @val: the xmlChar to search (needle)
338	*
339	* a strstr for xmlChar's
340	*
341	* Returns the xmlChar * for the first occurrence or NULL.
342	*/
343
344	const xmlChar *
345	xmlStrstr(const xmlChar str, const xmlChar val) {
346	int n;
347
348	if (str == NULL) return(NULL);
349	if (val == NULL) return(NULL);
350	n = xmlStrlen(val);
351
352	if (n == 0) return(str);
353	while (str != 0) { / non input consuming */
354	if (str == val) {
355	if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
356	}
357	str++;
358	}
359	return(NULL);
360	}
361
362	/**
363	* xmlStrcasestr:
364	* @str: the xmlChar * array (haystack)
365	* @val: the xmlChar to search (needle)
366	*
367	* a case-ignoring strstr for xmlChar's
368	*
369	* Returns the xmlChar * for the first occurrence or NULL.
370	*/
371
372	const xmlChar *
373	xmlStrcasestr(const xmlChar str, const xmlChar val) {
374	int n;
375
376	if (str == NULL) return(NULL);
377	if (val == NULL) return(NULL);
378	n = xmlStrlen(val);
379
380	if (n == 0) return(str);
381	while (str != 0) { / non input consuming */
382	if (casemap[str] == casemap[val])
383	if (!xmlStrncasecmp(str, val, n)) return(str);
384	str++;
385	}
386	return(NULL);
387	}
388
389	/**
390	* xmlStrsub:
391	* @str: the xmlChar * array (haystack)
392	* @start: the index of the first char (zero based)
393	* @len: the length of the substring
394	*
395	* Extract a substring of a given string
396	*
397	* Returns the xmlChar * for the first occurrence or NULL.
398	*/
399
400	xmlChar *
401	xmlStrsub(const xmlChar *str, int start, int len) {
402	int i;
403
404	if (str == NULL) return(NULL);
405	if (start < 0) return(NULL);
406	if (len < 0) return(NULL);
407
408	for (i = 0;i < start;i++) {
409	if (*str == 0) return(NULL);
410	str++;
411	}
412	if (*str == 0) return(NULL);
413	return(xmlStrndup(str, len));
414	}
415
416	/**
417	* xmlStrlen:
418	* @str: the xmlChar * array
419	*
420	* length of a xmlChar's string
421	*
422	* Returns the number of xmlChar contained in the ARRAY.
423	*/
424
425	int
426	xmlStrlen(const xmlChar *str) {
427	size_t len = 0;
428
429	if (str == NULL) return(0);
430	while (str != 0) { / non input consuming */
431	str++;
432	len++;
433	}
434	return(len > INT_MAX ? 0 : len);
435	}
436
437	/**
438	* xmlStrncat:
439	* @cur: the original xmlChar * array
440	* @add: the xmlChar * array added
441	* @len: the length of @add
442	*
443	* a strncat for array of xmlChar's, it will extend @cur with the len
444	* first bytes of @add. Note that if @len < 0 then this is an API error
445	* and NULL will be returned.
446	*
447	* Returns a new xmlChar *, the original @cur is reallocated and should
448	* not be freed.
449	*/
450
451	xmlChar *
452	xmlStrncat(xmlChar cur, const xmlChar add, int len) {
453	int size;
454	xmlChar *ret;
455
456	if ((add == NULL) \|\| (len == 0))
457	return(cur);
458	if (len < 0)
459	return(NULL);
460	if (cur == NULL)
461	return(xmlStrndup(add, len));
462
463	size = xmlStrlen(cur);
464	if ((size < 0) \|\| (size > INT_MAX - len))
465	return(NULL);
466	ret = (xmlChar ) xmlRealloc(cur, ((size_t) size + len + 1) sizeof(xmlChar));
467	if (ret == NULL) {
468	xmlErrMemory(NULL, NULL);
469	return(cur);
470	}
471	memcpy(&ret[size], add, len * sizeof(xmlChar));
472	ret[size + len] = 0;
473	return(ret);
474	}
475
476	/**
477	* xmlStrncatNew:
478	* @str1: first xmlChar string
479	* @str2: second xmlChar string
480	* @len: the len of @str2 or < 0
481	*
482	* same as xmlStrncat, but creates a new string. The original
483	* two strings are not freed. If @len is < 0 then the length
484	* will be calculated automatically.
485	*
486	* Returns a new xmlChar * or NULL
487	*/
488	xmlChar *
489	xmlStrncatNew(const xmlChar str1, const xmlChar str2, int len) {
490	int size;
491	xmlChar *ret;
492
493	if (len < 0) {
494	len = xmlStrlen(str2);
495	if (len < 0)
496	return(NULL);
497	}
498	if ((str2 == NULL) \|\| (len == 0))
499	return(xmlStrdup(str1));
500	if (str1 == NULL)
501	return(xmlStrndup(str2, len));
502
503	size = xmlStrlen(str1);
504	if ((size < 0) \|\| (size > INT_MAX - len))
505	return(NULL);
506	ret = (xmlChar ) xmlMalloc(((size_t) size + len + 1) sizeof(xmlChar));
507	if (ret == NULL) {
508	xmlErrMemory(NULL, NULL);
509	return(xmlStrndup(str1, size));
510	}
511	memcpy(ret, str1, size * sizeof(xmlChar));
512	memcpy(&ret[size], str2, len * sizeof(xmlChar));
513	ret[size + len] = 0;
514	return(ret);
515	}
516
517	/**
518	* xmlStrcat:
519	* @cur: the original xmlChar * array
520	* @add: the xmlChar * array added
521	*
522	* a strcat for array of xmlChar's. Since they are supposed to be
523	* encoded in UTF-8 or an encoding with 8bit based chars, we assume
524	* a termination mark of '0'.
525	*
526	* Returns a new xmlChar * containing the concatenated string. The original
527	* @cur is reallocated and should not be freed.
528	*/
529	xmlChar *
530	xmlStrcat(xmlChar cur, const xmlChar add) {
531	const xmlChar *p = add;
532
533	if (add == NULL) return(cur);
534	if (cur == NULL)
535	return(xmlStrdup(add));
536
537	while (p != 0) p++; / non input consuming */
538	return(xmlStrncat(cur, add, p - add));
539	}
540
541	/**
542	* xmlStrPrintf:
543	* @buf: the result buffer.
544	* @len: the result buffer length.
545	* @msg: the message with printf formatting.
546	* @...: extra parameters for the message.
547	*
548	* Formats @msg and places result into @buf.
549	*
550	* Returns the number of characters written to @buf or -1 if an error occurs.
551	*/
552	int XMLCDECL
553	xmlStrPrintf(xmlChar buf, int len, const char msg, ...) {
554	va_list args;
555	int ret;
556
557	if((buf == NULL) \|\| (msg == NULL)) {
558	return(-1);
559	}
560
561	va_start(args, msg);
562	ret = vsnprintf((char ) buf, len, (const char ) msg, args);
563	va_end(args);
564	buf[len - 1] = 0; /* be safe ! */
565
566	return(ret);
567	}
568
569	/**
570	* xmlStrVPrintf:
571	* @buf: the result buffer.
572	* @len: the result buffer length.
573	* @msg: the message with printf formatting.
574	* @ap: extra parameters for the message.
575	*
576	* Formats @msg and places result into @buf.
577	*
578	* Returns the number of characters written to @buf or -1 if an error occurs.
579	*/
580	int
581	xmlStrVPrintf(xmlChar buf, int len, const char msg, va_list ap) {
582	int ret;
583
584	if((buf == NULL) \|\| (msg == NULL)) {
585	return(-1);
586	}
587
588	ret = vsnprintf((char ) buf, len, (const char ) msg, ap);
589	buf[len - 1] = 0; /* be safe ! */
590
591	return(ret);
592	}
593
594	/************************************************************************
595	* *
596	* Generic UTF8 handling routines *
597	* *
598	* From rfc2044: encoding of the Unicode values on UTF-8: *
599	* *
600	* UCS-4 range (hex.) UTF-8 octet sequence (binary) *
601	* 0000 0000-0000 007F 0xxxxxxx *
602	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx *
603	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx *
604	* *
605	* I hope we won't use values > 0xFFFF anytime soon ! *
606	* *
607	************************************************************************/
608
609
610	/**
611	* xmlUTF8Size:
612	* @utf: pointer to the UTF8 character
613	*
614	* calculates the internal size of a UTF8 character
615	*
616	* returns the numbers of bytes in the character, -1 on format error
617	*/
618	int
619	xmlUTF8Size(const xmlChar *utf) {
620	xmlChar mask;
621	int len;
622
623	if (utf == NULL)
624	return -1;
625	if (*utf < 0x80)
626	return 1;
627	/* check valid UTF8 character */
628	if (!(*utf & 0x40))
629	return -1;
630	/* determine number of bytes in char */
631	len = 2;
632	for (mask=0x20; mask != 0; mask>>=1) {
633	if (!(*utf & mask))
634	return len;
635	len++;
636	}
637	return -1;
638	}
639
640	/**
641	* xmlUTF8Charcmp:
642	* @utf1: pointer to first UTF8 char
643	* @utf2: pointer to second UTF8 char
644	*
645	* compares the two UCS4 values
646	*
647	* returns result of the compare as with xmlStrncmp
648	*/
649	int
650	xmlUTF8Charcmp(const xmlChar utf1, const xmlChar utf2) {
651
652	if (utf1 == NULL ) {
653	if (utf2 == NULL)
654	return 0;
655	return -1;
656	}
657	return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
658	}
659
660	/**
661	* xmlUTF8Strlen:
662	* @utf: a sequence of UTF-8 encoded bytes
663	*
664	* compute the length of an UTF8 string, it doesn't do a full UTF8
665	* checking of the content of the string.
666	*
667	* Returns the number of characters in the string or -1 in case of error
668	*/
669	int
670	xmlUTF8Strlen(const xmlChar *utf) {
671	size_t ret = 0;
672
673	if (utf == NULL)
674	return(-1);
675
676	while (*utf != 0) {
677	if (utf[0] & 0x80) {
678	if ((utf[1] & 0xc0) != 0x80)
679	return(-1);
680	if ((utf[0] & 0xe0) == 0xe0) {
681	if ((utf[2] & 0xc0) != 0x80)
682	return(-1);
683	if ((utf[0] & 0xf0) == 0xf0) {
684	if ((utf[0] & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
685	return(-1);
686	utf += 4;
687	} else {
688	utf += 3;
689	}
690	} else {
691	utf += 2;
692	}
693	} else {
694	utf++;
695	}
696	ret++;
697	}
698	return(ret > INT_MAX ? 0 : ret);
699	}
700
701	/**
702	* xmlGetUTF8Char:
703	* @utf: a sequence of UTF-8 encoded bytes
704	* @len: a pointer to the minimum number of bytes present in
705	* the sequence. This is used to assure the next character
706	* is completely contained within the sequence.
707	*
708	* Read the first UTF8 character from @utf
709	*
710	* Returns the char value or -1 in case of error, and sets *len to
711	* the actual number of bytes consumed (0 in case of error)
712	*/
713	int
714	xmlGetUTF8Char(const unsigned char utf, int len) {
715	unsigned int c;
716
717	if (utf == NULL)
718	goto error;
719	if (len == NULL)
720	goto error;
721	if (*len < 1)
722	goto error;
723
724	c = utf[0];
725	if (c & 0x80) {
726	if (*len < 2)
727	goto error;
728	if ((utf[1] & 0xc0) != 0x80)
729	goto error;
730	if ((c & 0xe0) == 0xe0) {
731	if (*len < 3)
732	goto error;
733	if ((utf[2] & 0xc0) != 0x80)
734	goto error;
735	if ((c & 0xf0) == 0xf0) {
736	if (*len < 4)
737	goto error;
738	if ((c & 0xf8) != 0xf0 \|\| (utf[3] & 0xc0) != 0x80)
739	goto error;
740	*len = 4;
741	/* 4-byte code */
742	c = (utf[0] & 0x7) << 18;
743	c \|= (utf[1] & 0x3f) << 12;
744	c \|= (utf[2] & 0x3f) << 6;
745	c \|= utf[3] & 0x3f;
746	} else {
747	/* 3-byte code */
748	*len = 3;
749	c = (utf[0] & 0xf) << 12;
750	c \|= (utf[1] & 0x3f) << 6;
751	c \|= utf[2] & 0x3f;
752	}
753	} else {
754	/* 2-byte code */
755	*len = 2;
756	c = (utf[0] & 0x1f) << 6;
757	c \|= utf[1] & 0x3f;
758	}
759	} else {
760	/* 1-byte code */
761	*len = 1;
762	}
763	return(c);
764
765	error:
766	if (len != NULL)
767	*len = 0;
768	return(-1);
769	}
770
771	/**
772	* xmlCheckUTF8:
773	* @utf: Pointer to putative UTF-8 encoded string.
774	*
775	* Checks @utf for being valid UTF-8. @utf is assumed to be
776	* null-terminated. This function is not super-strict, as it will
777	* allow longer UTF-8 sequences than necessary. Note that Java is
778	* capable of producing these sequences if provoked. Also note, this
779	* routine checks for the 4-byte maximum size, but does not check for
780	* 0x10ffff maximum value.
781	*
782	* Return value: true if @utf is valid.
783	**/
784	int
785	xmlCheckUTF8(const unsigned char *utf)
786	{
787	int ix;
788	unsigned char c;
789
790	if (utf == NULL)
791	return(0);
792	/*
793	* utf is a string of 1, 2, 3 or 4 bytes. The valid strings
794	* are as follows (in "bit format"):
795	* 0xxxxxxx valid 1-byte
796	* 110xxxxx 10xxxxxx valid 2-byte
797	* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
798	* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
799	*/
800	while ((c = utf[0])) { /* string is 0-terminated */
801	ix = 0;
802	if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
803	ix = 1;
804	} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
805	if ((utf[1] & 0xc0 ) != 0x80)
806	return 0;
807	ix = 2;
808	} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
809	if (((utf[1] & 0xc0) != 0x80) \|\|
810	((utf[2] & 0xc0) != 0x80))
811	return 0;
812	ix = 3;
813	} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
814	if (((utf[1] & 0xc0) != 0x80) \|\|
815	((utf[2] & 0xc0) != 0x80) \|\|
816	((utf[3] & 0xc0) != 0x80))
817	return 0;
818	ix = 4;
819	} else /* unknown encoding */
820	return 0;
821	utf += ix;
822	}
823	return(1);
824	}
825
826	/**
827	* xmlUTF8Strsize:
828	* @utf: a sequence of UTF-8 encoded bytes
829	* @len: the number of characters in the array
830	*
831	* storage size of an UTF8 string
832	* the behaviour is not guaranteed if the input string is not UTF-8
833	*
834	* Returns the storage size of
835	* the first 'len' characters of ARRAY
836	*/
837
838	int
839	xmlUTF8Strsize(const xmlChar *utf, int len) {
840	const xmlChar *ptr=utf;
841	int ch;
842	size_t ret;
843
844	if (utf == NULL)
845	return(0);
846
847	if (len <= 0)
848	return(0);
849
850	while ( len-- > 0) {
851	if ( !*ptr )
852	break;
853	if ( (ch = *ptr++) & 0x80)
854	while ((ch<<=1) & 0x80 ) {
855	if (*ptr == 0) break;
856	ptr++;
857	}
858	}
859	ret = ptr - utf;
860	return (ret > INT_MAX ? 0 : ret);
861	}
862
863
864	/**
865	* xmlUTF8Strndup:
866	* @utf: the input UTF8 *
867	* @len: the len of @utf (in chars)
868	*
869	* a strndup for array of UTF8's
870	*
871	* Returns a new UTF8 * or NULL
872	*/
873	xmlChar *
874	xmlUTF8Strndup(const xmlChar *utf, int len) {
875	xmlChar *ret;
876	int i;
877
878	if ((utf == NULL) \|\| (len < 0)) return(NULL);
879	i = xmlUTF8Strsize(utf, len);
880	ret = (xmlChar ) xmlMallocAtomic(((size_t) i + 1) sizeof(xmlChar));
881	if (ret == NULL) {
882	return(NULL);
883	}
884	memcpy(ret, utf, i * sizeof(xmlChar));
885	ret[i] = 0;
886	return(ret);
887	}
888
889	/**
890	* xmlUTF8Strpos:
891	* @utf: the input UTF8 *
892	* @pos: the position of the desired UTF8 char (in chars)
893	*
894	* a function to provide the equivalent of fetching a
895	* character from a string array
896	*
897	* Returns a pointer to the UTF8 character or NULL
898	*/
899	const xmlChar *
900	xmlUTF8Strpos(const xmlChar *utf, int pos) {
901	int ch;
902
903	if (utf == NULL) return(NULL);
904	if (pos < 0)
905	return(NULL);
906	while (pos--) {
907	if ((ch=*utf++) == 0) return(NULL);
908	if ( ch & 0x80 ) {
909	/* if not simple ascii, verify proper format */
910	if ( (ch & 0xc0) != 0xc0 )
911	return(NULL);
912	/* then skip over remaining bytes for this char */
913	while ( (ch <<= 1) & 0x80 )
914	if ( (*utf++ & 0xc0) != 0x80 )
915	return(NULL);
916	}
917	}
918	return((xmlChar *)utf);
919	}
920
921	/**
922	* xmlUTF8Strloc:
923	* @utf: the input UTF8 *
924	* @utfchar: the UTF8 character to be found
925	*
926	* a function to provide the relative location of a UTF8 char
927	*
928	* Returns the relative character position of the desired char
929	* or -1 if not found
930	*/
931	int
932	xmlUTF8Strloc(const xmlChar utf, const xmlChar utfchar) {
933	size_t i;
934	int size;
935	int ch;
936
937	if (utf==NULL \|\| utfchar==NULL) return -1;
938	size = xmlUTF8Strsize(utfchar, 1);
939	for(i=0; (ch=*utf) != 0; i++) {
940	if (xmlStrncmp(utf, utfchar, size)==0)
941	return(i > INT_MAX ? 0 : i);
942	utf++;
943	if ( ch & 0x80 ) {
944	/* if not simple ascii, verify proper format */
945	if ( (ch & 0xc0) != 0xc0 )
946	return(-1);
947	/* then skip over remaining bytes for this char */
948	while ( (ch <<= 1) & 0x80 )
949	if ( (*utf++ & 0xc0) != 0x80 )
950	return(-1);
951	}
952	}
953
954	return(-1);
955	}
956	/**
957	* xmlUTF8Strsub:
958	* @utf: a sequence of UTF-8 encoded bytes
959	* @start: relative pos of first char
960	* @len: total number to copy
961	*
962	* Create a substring from a given UTF-8 string
963	* Note: positions are given in units of UTF-8 chars
964	*
965	* Returns a pointer to a newly created string
966	* or NULL if any problem
967	*/
968
969	xmlChar *
970	xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
971	int i;
972	int ch;
973
974	if (utf == NULL) return(NULL);
975	if (start < 0) return(NULL);
976	if (len < 0) return(NULL);
977
978	/*
979	* Skip over any leading chars
980	*/
981	for (i = 0;i < start;i++) {
982	if ((ch=*utf++) == 0) return(NULL);
983	if ( ch & 0x80 ) {
984	/* if not simple ascii, verify proper format */
985	if ( (ch & 0xc0) != 0xc0 )
986	return(NULL);
987	/* then skip over remaining bytes for this char */
988	while ( (ch <<= 1) & 0x80 )
989	if ( (*utf++ & 0xc0) != 0x80 )
990	return(NULL);
991	}
992	}
993
994	return(xmlUTF8Strndup(utf, len));
995	}
996
997	/**
998	* xmlEscapeFormatString:
999	* @msg: a pointer to the string in which to escape '%' characters.
1000	* Must be a heap-allocated buffer created by libxml2 that may be
1001	* returned, or that may be freed and replaced.
1002	*
1003	* Replaces the string pointed to by 'msg' with an escaped string.
1004	* Returns the same string with all '%' characters escaped.
1005	*/
1006	xmlChar *
1007	xmlEscapeFormatString(xmlChar **msg)
1008	{
1009	xmlChar *msgPtr = NULL;
1010	xmlChar *result = NULL;
1011	xmlChar *resultPtr = NULL;
1012	size_t count = 0;
1013	size_t msgLen = 0;
1014	size_t resultLen = 0;
1015
1016	if (!msg \|\| !*msg)
1017	return(NULL);
1018
1019	for (msgPtr = msg; msgPtr != '\0'; ++msgPtr) {
1020	++msgLen;
1021	if (*msgPtr == '%')
1022	++count;
1023	}
1024
1025	if (count == 0)
1026	return(*msg);
1027
1028	if ((count > INT_MAX) \|\| (msgLen > INT_MAX - count))
1029	return(NULL);
1030	resultLen = msgLen + count + 1;
1031	result = (xmlChar ) xmlMallocAtomic(resultLen sizeof(xmlChar));
1032	if (result == NULL) {
1033	/* Clear *msg to prevent format string vulnerabilities in
1034	out-of-memory situations. */
1035	xmlFree(*msg);
1036	*msg = NULL;
1037	xmlErrMemory(NULL, NULL);
1038	return(NULL);
1039	}
1040
1041	for (msgPtr = msg, resultPtr = result; msgPtr != '\0'; ++msgPtr, ++resultPtr) {
1042	resultPtr = msgPtr;
1043	if (*msgPtr == '%')
1044	*(++resultPtr) = '%';
1045	}
1046	result[resultLen - 1] = '\0';
1047
1048	xmlFree(*msg);
1049	*msg = result;
1050
1051	return *msg;
1052	}
1053
1054	#define bottom_xmlstring
1055	#include "elfgcchack.h"

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/libxml2-2.9.14/xmlstring.c@ 102654

Download in other formats: