genChRanges.py@ 104932

Last change on this file since 104932 was 104106, checked in by vboxsync, 9 months ago
libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640
Property svn:eol-style set to `native` Property svn:executable set to ``*
File size: 17.7 KB

Line
1	#!/usr/bin/env python3
2	#
3	# Portions of this script have been (shamelessly) stolen from the
4	# prior work of Daniel Veillard (genUnicode.py)
5	#
6	# I, however, take full credit for any bugs, errors or difficulties :-)
7	#
8	# William Brack
9	# October 2003
10	#
11	# 18 October 2003
12	# Modified to maintain binary compatibility with previous library versions
13	# by adding a suffix 'Q' ('quick') to the macro generated for the original,
14	# function, and adding generation of a function (with the original name) which
15	# instantiates the macro.
16	#
17
18	import sys
19	import time
20
21	#
22	# A routine to take a list of yes/no (1, 0) values and turn it
23	# into a list of ranges. This will later be used to determine whether
24	# to generate single-byte lookup tables, or inline comparisons
25	#
26	def makeRange(lst):
27	ret = []
28	pos = 0
29	while pos < len(lst):
30	try: # index generates exception if not present
31	s = lst[pos:].index(1) # look for start of next range
32	except:
33	break # if no more, finished
34	pos += s # pointer to start of possible range
35	try:
36	e = lst[pos:].index(0) # look for end of range
37	e += pos
38	except: # if no end, set to end of list
39	e = len(lst)
40	ret.append((pos, e-1)) # append range tuple to list
41	pos = e + 1 # ready to check for next range
42	return ret
43
44	sources = "chvalid.def" # input filename
45
46	# minTableSize gives the minimum number of ranges which must be present
47	# before a 256-byte lookup table is produced. If there are less than this
48	# number, a macro with inline comparisons is generated
49	minTableSize = 6
50
51	# dictionary of functions, key=name, element contains char-map and range-list
52	Functs = {}
53
54	state = 0
55
56	try:
57	defines = open("chvalid.def", "r")
58	except:
59	print("Missing chvalid.def, aborting ...")
60	sys.exit(1)
61
62	#
63	# The lines in the .def file have three types:-
64	# name: Defines a new function block
65	# ur: Defines individual or ranges of unicode values
66	# end: Indicates the end of the function block
67	#
68	# These lines are processed below.
69	#
70	for line in defines.readlines():
71	# ignore blank lines, or lines beginning with '#'
72	if line[0] == '#':
73	continue
74	line = line.strip()
75	if line == '':
76	continue
77	# split line into space-separated fields, then split on type
78	try:
79	fields = line.split(' ')
80	#
81	# name line:
82	# validate any previous function block already ended
83	# validate this function not already defined
84	# initialize an entry in the function dicitonary
85	# including a mask table with no values yet defined
86	#
87	if fields[0] == 'name':
88	name = fields[1]
89	if state != 0:
90	print("'name' %s found before previous name" \
91	"completed" % (fields[1]))
92	continue
93	state = 1
94	if name in Functs:
95	print("name '%s' already present - may give" \
96	" wrong results" % (name))
97	else:
98	# dict entry with two list elements (chdata, rangedata)
99	Functs[name] = [ [], [] ]
100	for v in range(256):
101	Functs[name][0].append(0)
102	#
103	# end line:
104	# validate there was a preceding function name line
105	# set state to show no current function active
106	#
107	elif fields[0] == 'end':
108	if state == 0:
109	print("'end' found outside of function block")
110	continue
111	state = 0
112
113	#
114	# ur line:
115	# validate function has been defined
116	# process remaining fields on the line, which may be either
117	# individual unicode values or ranges of values
118	#
119	elif fields[0] == 'ur':
120	if state != 1:
121	raise Exception("'ur' found outside of 'name' block")
122	for el in fields[1:]:
123	pos = el.find('..')
124	# pos <=0 means not a range, so must be individual value
125	if pos <= 0:
126	# cheap handling of hex or decimal values
127	if el[0:2] == '0x':
128	value = int(el[2:],16)
129	elif el[0] == "'":
130	value = ord(el[1])
131	else:
132	value = int(el)
133	if ((value < 0) \| (value > 0x1fffff)):
134	raise Exception('Illegal value (%s) in ch for'\
135	' name %s' % (el,name))
136	# for ur we have only ranges (makes things simpler),
137	# so convert val to range
138	currange = (value, value)
139	# pos > 0 means this is a range, so isolate/validate
140	# the interval
141	else:
142	# split the range into it's first-val, last-val
143	(first, last) = el.split("..")
144	# convert values from text into binary
145	if first[0:2] == '0x':
146	start = int(first[2:],16)
147	elif first[0] == "'":
148	start = ord(first[1])
149	else:
150	start = int(first)
151	if last[0:2] == '0x':
152	end = int(last[2:],16)
153	elif last[0] == "'":
154	end = ord(last[1])
155	else:
156	end = int(last)
157	if (start < 0) \| (end > 0x1fffff) \| (start > end):
158	raise Exception("Invalid range '%s'" % el)
159	currange = (start, end)
160	# common path - 'currange' has the range, now take care of it
161	# We split on single-byte values vs. multibyte
162	if currange[1] < 0x100: # single-byte
163	for ch in range(currange[0],currange[1]+1):
164	# validate that value not previously defined
165	if Functs[name][0][ch]:
166	msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
167	raise Exception(msg)
168	Functs[name][0][ch] = 1
169	else: # multi-byte
170	if currange in Functs[name][1]:
171	raise Exception("range already defined in" \
172	" function")
173	else:
174	Functs[name][1].append(currange)
175
176	except:
177	print("Failed to process line: %s" % (line))
178	raise
179	#
180	# At this point, the entire definition file has been processed. Now we
181	# enter the output phase, where we generate the two files chvalid.c and'
182	# chvalid.h
183	#
184	# To do this, we first output the 'static' data (heading, fixed
185	# definitions, etc.), then output the 'dynamic' data (the results
186	# of the above processing), and finally output closing 'static' data
187	# (e.g. the subroutine to process the ranges)
188	#
189
190	#
191	# Generate the headings:
192	#
193	try:
194	header = open("include/libxml/chvalid.h", "w")
195	except:
196	print("Failed to open include/libxml/chvalid.h")
197	sys.exit(1)
198
199	try:
200	output = open("chvalid.c", "w")
201	except:
202	print("Failed to open chvalid.c")
203	sys.exit(1)
204
205	date = time.asctime(time.localtime(time.time()))
206
207	header.write(
208	"""/*
209	* Summary: Unicode character range checking
210	* Description: this module exports interfaces for the character
211	* range validation APIs
212	*
213	* This file is automatically generated from the cvs source
214	* definition files using the genChRanges.py Python script
215	*
216	* Generation date: %s
217	* Sources: %s
218	* Author: William Brack <wbrack@mmm.com.hk>
219	*/
220
221	#ifndef __XML_CHVALID_H__
222	#define __XML_CHVALID_H__
223
224	#include <libxml/xmlversion.h>
225	#include <libxml/xmlstring.h>
226
227	#ifdef __cplusplus
228	extern "C" {
229	#endif
230
231	/*
232	* Define our typedefs and structures
233	*
234	*/
235	typedef struct _xmlChSRange xmlChSRange;
236	typedef xmlChSRange *xmlChSRangePtr;
237	struct _xmlChSRange {
238	unsigned short\tlow;
239	unsigned short\thigh;
240	};
241
242	typedef struct _xmlChLRange xmlChLRange;
243	typedef xmlChLRange *xmlChLRangePtr;
244	struct _xmlChLRange {
245	unsigned int\tlow;
246	unsigned int\thigh;
247	};
248
249	typedef struct _xmlChRangeGroup xmlChRangeGroup;
250	typedef xmlChRangeGroup *xmlChRangeGroupPtr;
251	struct _xmlChRangeGroup {
252	int\t\t\tnbShortRange;
253	int\t\t\tnbLongRange;
254	const xmlChSRange\tshortRange;\t/ points to an array of ranges */
255	const xmlChLRange\t*longRange;
256	};
257
258	/**
259	* Range checking routine
260	*/
261	XMLPUBFUN int
262	\t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group);
263
264	""" % (date, sources));
265	output.write(
266	"""/*
267	* chvalid.c:\tthis module implements the character range
268	*\t\tvalidation APIs
269	*
270	* This file is automatically generated from the cvs source
271	* definition files using the genChRanges.py Python script
272	*
273	* Generation date: %s
274	* Sources: %s
275	* William Brack <wbrack@mmm.com.hk>
276	*/
277
278	#define IN_LIBXML
279	#include "libxml.h"
280	#include <libxml/chvalid.h>
281
282	#include <stddef.h>
283
284	/*
285	* The initial tables ({func_name}_tab) are used to validate whether a
286	* single-byte character is within the specified group. Each table
287	* contains 256 bytes, with each byte representing one of the 256
288	* possible characters. If the table byte is set, the character is
289	* allowed.
290	*
291	*/
292	""" % (date, sources));
293
294	#
295	# Now output the generated data.
296	# We try to produce the best execution times. Tests have shown that validation
297	# with direct table lookup is, when there are a "small" number of valid items,
298	# still not as fast as a sequence of inline compares. So, if the single-byte
299	# portion of a range has a "small" number of ranges, we output a macro for inline
300	# compares, otherwise we output a 256-byte table and a macro to use it.
301	#
302
303	fkeys = sorted(Functs.keys())
304
305	for f in fkeys:
306
307	# First we convert the specified single-byte values into a group of ranges.
308	# If the total number of such ranges is less than minTableSize, we generate
309	# an inline macro for direct comparisons; if greater, we generate a lookup
310	# table.
311	if max(Functs[f][0]) > 0: # only check if at least one entry
312	rangeTable = makeRange(Functs[f][0])
313	numRanges = len(rangeTable)
314	if numRanges >= minTableSize: # table is worthwhile
315	header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f)
316	header.write("""
317	/**
318	* %s_ch:
319	* @c: char to validate
320	*
321	* Automatically generated by genChRanges.py
322	*/
323	""" % f)
324	header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
325
326	# write the constant data to the code file
327	output.write("const unsigned char %s_tab[256] = {\n" % f)
328	pline = " "
329	for n in range(255):
330	pline += " 0x%02x," % Functs[f][0][n]
331	if len(pline) > 72:
332	output.write(pline + "\n")
333	pline = " "
334	output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
335
336	else: # inline check is used
337	# first another little optimisation - if space is present,
338	# put it at the front of the list so it is checked first
339	try:
340	ix = rangeTable.remove((0x20, 0x20))
341	rangeTable.insert(0, (0x20, 0x20))
342	except:
343	pass
344	firstFlag = 1
345
346	header.write("""
347	/**
348	* %s_ch:
349	* @c: char to validate
350	*
351	* Automatically generated by genChRanges.py
352	*/
353	""" % f)
354	# okay, I'm tired of the messy lineup - let's automate it!
355	pline = "#define %s_ch(c)" % f
356	# 'ntab' is number of tabs needed to position to col. 33 from name end
357	ntab = 4 - (len(pline)) // 8
358	if ntab < 0:
359	ntab = 0
360	just = ""
361	for i in range(ntab):
362	just += "\t"
363	pline = pline + just + "("
364	for rg in rangeTable:
365	if not firstFlag:
366	pline += " \|\| \\\n\t\t\t\t "
367	else:
368	firstFlag = 0
369	if rg[0] == rg[1]: # single value - check equal
370	pline += "((c) == 0x%x)" % rg[0]
371	else: # value range
372	# since we are doing char, also change range ending in 0xff
373	if rg[1] != 0xff:
374	pline += "((0x%x <= (c)) &&" % rg[0]
375	pline += " ((c) <= 0x%x))" % rg[1]
376	else:
377	pline += " (0x%x <= (c))" % rg[0]
378	pline += ")\n"
379	header.write(pline)
380
381	header.write("""
382	/**
383	* %sQ:
384	* @c: char to validate
385	*
386	* Automatically generated by genChRanges.py
387	*/
388	""" % f)
389	pline = "#define %sQ(c)" % f
390	ntab = 4 - (len(pline)) // 8
391	if ntab < 0:
392	ntab = 0
393	just = ""
394	for i in range(ntab):
395	just += "\t"
396	header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ")
397	if max(Functs[f][0]) > 0:
398	header.write("%s_ch((c)) :" % f)
399	else:
400	header.write("0 :")
401
402	# if no ranges defined, value invalid if >= 0x100
403	numRanges = len(Functs[f][1])
404	if numRanges == 0:
405	header.write(" 0)\n\n")
406	else:
407	if numRanges >= minTableSize:
408	header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f)
409	else: # if < minTableSize, generate inline code
410	firstFlag = 1
411	for rg in Functs[f][1]:
412	if not firstFlag:
413	pline += " \|\| \\\n\t\t\t\t "
414	else:
415	firstFlag = 0
416	pline = "\\\n\t\t\t\t("
417	if rg[0] == rg[1]: # single value - check equal
418	pline += "((c) == 0x%x)" % rg[0]
419	else: # value range
420	pline += "((0x%x <= (c)) &&" % rg[0]
421	pline += " ((c) <= 0x%x))" % rg[1]
422	pline += "))\n\n"
423	header.write(pline)
424
425
426	if len(Functs[f][1]) > 0:
427	header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f)
428
429
430	#
431	# Next we do the unicode ranges
432	#
433
434	for f in fkeys:
435	if len(Functs[f][1]) > 0: # only generate if unicode ranges present
436	rangeTable = Functs[f][1]
437	rangeTable.sort() # ascending tuple sequence
438	numShort = 0
439	numLong = 0
440	for rg in rangeTable:
441	if rg[1] < 0x10000: # if short value
442	if numShort == 0: # first occurrence
443	pline = "static const xmlChSRange %s_srng[] = {" % f
444	else:
445	pline += ","
446	numShort += 1
447	if len(pline) > 60:
448	output.write(pline + "\n")
449	pline = " "
450	else:
451	pline += " "
452	pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
453	else: # if long value
454	if numLong == 0: # first occurrence
455	if numShort > 0: # if there were shorts, finish them off
456	output.write(pline + "};\n")
457	pline = "static const xmlChLRange %s_lrng[] = { " % f
458	else:
459	pline += ", "
460	numLong += 1
461	if len(pline) > 60:
462	output.write(pline + "\n")
463	pline = " "
464	pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
465	output.write(pline + "};\n") # finish off last group
466
467	pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong)
468	if numShort > 0:
469	pline += "%s_srng" % f
470	else:
471	pline += "(xmlChSRangePtr)0"
472	if numLong > 0:
473	pline += ", %s_lrng" % f
474	else:
475	pline += ", (xmlChLRangePtr)0"
476
477	output.write(pline + "};\n\n")
478
479	output.write(
480	"""
481	/**
482	* xmlCharInRange:
483	* @val: character to be validated
484	* @rptr: pointer to range to be used to validate
485	*
486	* Does a binary search of the range table to determine if char
487	* is valid
488	*
489	* Returns: true if character valid, false otherwise
490	*/
491	int
492	xmlCharInRange (unsigned int val, const xmlChRangeGroup *rptr) {
493	int low, high, mid;
494	const xmlChSRange *sptr;
495	const xmlChLRange *lptr;
496
497	if (rptr == NULL) return(0);
498	if (val < 0x10000) {\t/* is val in 'short' or 'long' array? */
499	\tif (rptr->nbShortRange == 0)
500	\t return 0;
501	\tlow = 0;
502	\thigh = rptr->nbShortRange - 1;
503	\tsptr = rptr->shortRange;
504	\twhile (low <= high) {
505	\t mid = (low + high) / 2;
506	\t if ((unsigned short) val < sptr[mid].low) {
507	\t\thigh = mid - 1;
508	\t } else {
509	\t\tif ((unsigned short) val > sptr[mid].high) {
510	\t\t low = mid + 1;
511	\t\t} else {
512	\t\t return 1;
513	\t\t}
514	\t }
515	\t}
516	} else {
517	\tif (rptr->nbLongRange == 0) {
518	\t return 0;
519	\t}
520	\tlow = 0;
521	\thigh = rptr->nbLongRange - 1;
522	\tlptr = rptr->longRange;
523	\twhile (low <= high) {
524	\t mid = (low + high) / 2;
525	\t if (val < lptr[mid].low) {
526	\t\thigh = mid - 1;
527	\t } else {
528	\t\tif (val > lptr[mid].high) {
529	\t\t low = mid + 1;
530	\t\t} else {
531	\t\t return 1;
532	\t\t}
533	\t }
534	\t}
535	}
536	return 0;
537	}
538
539	""");
540
541	#
542	# finally, generate the ABI compatibility functions
543	#
544	for f in fkeys:
545	output.write("""
546	/**
547	* %s:
548	* @ch: character to validate
549	*
550	* This function is DEPRECATED.
551	""" % f);
552	if max(Functs[f][0]) > 0:
553	output.write(" * Use %s_ch or %sQ instead" % (f, f))
554	else:
555	output.write(" * Use %sQ instead" % f)
556	output.write("""
557	*
558	* Returns true if argument valid, false otherwise
559	*/
560	""")
561	output.write("int\n%s(unsigned int ch) {\n return(%sQ(ch));\n}\n\n" % (f,f))
562	header.write("XMLPUBFUN int\n\t\t%s(unsigned int ch);\n" % f);
563	#
564	# Run complete - write trailers and close the output files
565	#
566
567	header.write("""
568	#ifdef __cplusplus
569	}
570	#endif
571	#endif /* __XML_CHVALID_H__ */
572	""")
573
574	header.close()
575
576	output.close()
577

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/libxml2-2.12.6/genChRanges.py@ 104932

Download in other formats: