1 | #!/usr/bin/python -u
|
---|
2 | # -*- coding: utf-8 -*-
|
---|
3 | #
|
---|
4 | # this tests the DTD validation with the XmlTextReader interface
|
---|
5 | #
|
---|
6 | import sys
|
---|
7 | import glob
|
---|
8 | import string
|
---|
9 | import libxml2
|
---|
10 | try:
|
---|
11 | import StringIO
|
---|
12 | str_io = StringIO.StringIO
|
---|
13 | except:
|
---|
14 | import io
|
---|
15 | str_io = io.StringIO
|
---|
16 |
|
---|
17 | # Memory debug specific
|
---|
18 | libxml2.debugMemory(1)
|
---|
19 |
|
---|
20 | err = ""
|
---|
21 | dir_prefix = "../../test/valid/"
|
---|
22 | # This dictionary reflects the contents of the files
|
---|
23 | # ../../test/valid/*.xml.err that are not empty, except that
|
---|
24 | # the file paths in the messages start with ../../test/
|
---|
25 |
|
---|
26 | expect = {
|
---|
27 | '766956':
|
---|
28 | """../../test/valid/dtds/766956.dtd:2: parser error : PEReference: expecting ';'
|
---|
29 | %ä%ent;
|
---|
30 | ^
|
---|
31 | ../../test/valid/dtds/766956.dtd:2: parser error : Content error in the external subset
|
---|
32 | %ä%ent;
|
---|
33 | ^
|
---|
34 | Entity: line 1:
|
---|
35 | value
|
---|
36 | ^
|
---|
37 | """,
|
---|
38 | '781333':
|
---|
39 | """../../test/valid/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got
|
---|
40 | <a/>
|
---|
41 | ^
|
---|
42 | ../../test/valid/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more child
|
---|
43 |
|
---|
44 | ^
|
---|
45 | """,
|
---|
46 | 'cond_sect2':
|
---|
47 | """../../test/valid/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
|
---|
48 | %ent;
|
---|
49 | ^
|
---|
50 | Entity: line 1:
|
---|
51 | ]]>
|
---|
52 | ^
|
---|
53 | ../../test/valid/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
|
---|
54 |
|
---|
55 | ^
|
---|
56 | """,
|
---|
57 | 'rss':
|
---|
58 | """../../test/valid/rss.xml:177: element rss: validity error : Element rss does not carry attribute version
|
---|
59 | </rss>
|
---|
60 | ^
|
---|
61 | """,
|
---|
62 | 't8':
|
---|
63 | """../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
|
---|
64 |
|
---|
65 | %defroot; %defmiddle; %deftest;
|
---|
66 | ^
|
---|
67 | Entity: line 1:
|
---|
68 | <!ELEMENT root (middle) >
|
---|
69 | ^
|
---|
70 | ../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
|
---|
71 |
|
---|
72 | %defroot; %defmiddle; %deftest;
|
---|
73 | ^
|
---|
74 | Entity: line 1:
|
---|
75 | <!ELEMENT middle (test) >
|
---|
76 | ^
|
---|
77 | ../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
|
---|
78 |
|
---|
79 | %defroot; %defmiddle; %deftest;
|
---|
80 | ^
|
---|
81 | Entity: line 1:
|
---|
82 | <!ELEMENT test (#PCDATA) >
|
---|
83 | ^
|
---|
84 | """,
|
---|
85 | 't8a':
|
---|
86 | """../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
|
---|
87 |
|
---|
88 | %defroot;%defmiddle;%deftest;
|
---|
89 | ^
|
---|
90 | Entity: line 1:
|
---|
91 | <!ELEMENT root (middle) >
|
---|
92 | ^
|
---|
93 | ../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
|
---|
94 |
|
---|
95 | %defroot;%defmiddle;%deftest;
|
---|
96 | ^
|
---|
97 | Entity: line 1:
|
---|
98 | <!ELEMENT middle (test) >
|
---|
99 | ^
|
---|
100 | ../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
|
---|
101 |
|
---|
102 | %defroot;%defmiddle;%deftest;
|
---|
103 | ^
|
---|
104 | Entity: line 1:
|
---|
105 | <!ELEMENT test (#PCDATA) >
|
---|
106 | ^
|
---|
107 | """,
|
---|
108 | 'xlink':
|
---|
109 | """../../test/valid/xlink.xml:450: element termdef: validity error : ID dt-arc already defined
|
---|
110 | <p><termdef id="dt-arc" term="Arc">An <ter
|
---|
111 | ^
|
---|
112 | validity error : attribute def line 199 references an unknown ID "dt-xlg"
|
---|
113 | """,
|
---|
114 | }
|
---|
115 |
|
---|
116 | # Add prefix_dir and extension to the keys
|
---|
117 | expect = {"{}{}.xml".format(dir_prefix, key): val for key, val in expect.items()}
|
---|
118 |
|
---|
119 | def callback(ctx, str):
|
---|
120 | global err
|
---|
121 | err = err + "%s" % (str)
|
---|
122 | libxml2.registerErrorHandler(callback, "")
|
---|
123 |
|
---|
124 | parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"]
|
---|
125 | expect_parsing_error = ["{}{}.xml".format(dir_prefix, f) for f in parsing_error_files]
|
---|
126 |
|
---|
127 | valid_files = glob.glob(dir_prefix + "*.x*")
|
---|
128 | valid_files.sort()
|
---|
129 | for file in valid_files:
|
---|
130 | err = ""
|
---|
131 | reader = libxml2.newTextReaderFilename(file)
|
---|
132 | #print "%s:" % (file)
|
---|
133 | reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
|
---|
134 | ret = reader.Read()
|
---|
135 | while ret == 1:
|
---|
136 | ret = reader.Read()
|
---|
137 | if ret != 0 and file not in expect_parsing_error:
|
---|
138 | print("Error parsing and validating %s" % (file))
|
---|
139 | #sys.exit(1)
|
---|
140 | if (err):
|
---|
141 | if not(file in expect and err == expect[file]):
|
---|
142 | print("Error: ", err)
|
---|
143 | if file in expect:
|
---|
144 | print("Expected: ", expect[file])
|
---|
145 | #
|
---|
146 | # another separate test based on Stephane Bidoul one
|
---|
147 | #
|
---|
148 | s = """
|
---|
149 | <!DOCTYPE test [
|
---|
150 | <!ELEMENT test (x,b)>
|
---|
151 | <!ELEMENT x (c)>
|
---|
152 | <!ELEMENT b (#PCDATA)>
|
---|
153 | <!ELEMENT c (#PCDATA)>
|
---|
154 | <!ENTITY x "<x><c>xxx</c></x>">
|
---|
155 | ]>
|
---|
156 | <test>
|
---|
157 | &x;
|
---|
158 | <b>bbb</b>
|
---|
159 | </test>
|
---|
160 | """
|
---|
161 | expect="""10,test
|
---|
162 | 1,test
|
---|
163 | 14,#text
|
---|
164 | 1,x
|
---|
165 | 1,c
|
---|
166 | 3,#text
|
---|
167 | 15,c
|
---|
168 | 15,x
|
---|
169 | 14,#text
|
---|
170 | 1,b
|
---|
171 | 3,#text
|
---|
172 | 15,b
|
---|
173 | 14,#text
|
---|
174 | 15,test
|
---|
175 | """
|
---|
176 | res=""
|
---|
177 | err=""
|
---|
178 |
|
---|
179 | input = libxml2.inputBuffer(str_io(s))
|
---|
180 | reader = input.newTextReader("test2")
|
---|
181 | reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
|
---|
182 | reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
|
---|
183 | reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
|
---|
184 | reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
|
---|
185 | while reader.Read() == 1:
|
---|
186 | res = res + "%s,%s\n" % (reader.NodeType(),reader.Name())
|
---|
187 |
|
---|
188 | if res != expect:
|
---|
189 | print("test2 failed: unexpected output")
|
---|
190 | print(res)
|
---|
191 | sys.exit(1)
|
---|
192 | if err != "":
|
---|
193 | print("test2 failed: validation error found")
|
---|
194 | print(err)
|
---|
195 | sys.exit(1)
|
---|
196 |
|
---|
197 | #
|
---|
198 | # Another test for external entity parsing and validation
|
---|
199 | #
|
---|
200 |
|
---|
201 | s = """<!DOCTYPE test [
|
---|
202 | <!ELEMENT test (x)>
|
---|
203 | <!ELEMENT x (#PCDATA)>
|
---|
204 | <!ENTITY e SYSTEM "tst.ent">
|
---|
205 | ]>
|
---|
206 | <test>
|
---|
207 | &e;
|
---|
208 | </test>
|
---|
209 | """
|
---|
210 | tst_ent = """<x>hello</x>"""
|
---|
211 | expect="""10 test
|
---|
212 | 1 test
|
---|
213 | 14 #text
|
---|
214 | 1 x
|
---|
215 | 3 #text
|
---|
216 | 15 x
|
---|
217 | 14 #text
|
---|
218 | 15 test
|
---|
219 | """
|
---|
220 | res=""
|
---|
221 |
|
---|
222 | def myResolver(URL, ID, ctxt):
|
---|
223 | if URL == "tst.ent":
|
---|
224 | return(str_io(tst_ent))
|
---|
225 | return None
|
---|
226 |
|
---|
227 | libxml2.setEntityLoader(myResolver)
|
---|
228 |
|
---|
229 | input = libxml2.inputBuffer(str_io(s))
|
---|
230 | reader = input.newTextReader("test3")
|
---|
231 | reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
|
---|
232 | reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
|
---|
233 | reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
|
---|
234 | reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
|
---|
235 | while reader.Read() == 1:
|
---|
236 | res = res + "%s %s\n" % (reader.NodeType(),reader.Name())
|
---|
237 |
|
---|
238 | if res != expect:
|
---|
239 | print("test3 failed: unexpected output")
|
---|
240 | print(res)
|
---|
241 | sys.exit(1)
|
---|
242 | if err != "":
|
---|
243 | print("test3 failed: validation error found")
|
---|
244 | print(err)
|
---|
245 | sys.exit(1)
|
---|
246 |
|
---|
247 | #
|
---|
248 | # Another test for recursive entity parsing, validation, and replacement of
|
---|
249 | # entities, making sure the entity ref node doesn't show up in that case
|
---|
250 | #
|
---|
251 |
|
---|
252 | s = """<!DOCTYPE test [
|
---|
253 | <!ELEMENT test (x, x)>
|
---|
254 | <!ELEMENT x (y)>
|
---|
255 | <!ELEMENT y (#PCDATA)>
|
---|
256 | <!ENTITY x "<x>&y;</x>">
|
---|
257 | <!ENTITY y "<y>yyy</y>">
|
---|
258 | ]>
|
---|
259 | <test>
|
---|
260 | &x;
|
---|
261 | &x;
|
---|
262 | </test>"""
|
---|
263 | expect="""10 test 0
|
---|
264 | 1 test 0
|
---|
265 | 14 #text 1
|
---|
266 | 1 x 1
|
---|
267 | 1 y 2
|
---|
268 | 3 #text 3
|
---|
269 | 15 y 2
|
---|
270 | 15 x 1
|
---|
271 | 14 #text 1
|
---|
272 | 1 x 1
|
---|
273 | 1 y 2
|
---|
274 | 3 #text 3
|
---|
275 | 15 y 2
|
---|
276 | 15 x 1
|
---|
277 | 14 #text 1
|
---|
278 | 15 test 0
|
---|
279 | """
|
---|
280 | res=""
|
---|
281 | err=""
|
---|
282 |
|
---|
283 | input = libxml2.inputBuffer(str_io(s))
|
---|
284 | reader = input.newTextReader("test4")
|
---|
285 | reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
|
---|
286 | reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
|
---|
287 | reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
|
---|
288 | reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
|
---|
289 | while reader.Read() == 1:
|
---|
290 | res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
|
---|
291 |
|
---|
292 | if res != expect:
|
---|
293 | print("test4 failed: unexpected output")
|
---|
294 | print(res)
|
---|
295 | sys.exit(1)
|
---|
296 | if err != "":
|
---|
297 | print("test4 failed: validation error found")
|
---|
298 | print(err)
|
---|
299 | sys.exit(1)
|
---|
300 |
|
---|
301 | #
|
---|
302 | # The same test but without entity substitution this time
|
---|
303 | #
|
---|
304 |
|
---|
305 | s = """<!DOCTYPE test [
|
---|
306 | <!ELEMENT test (x, x)>
|
---|
307 | <!ELEMENT x (y)>
|
---|
308 | <!ELEMENT y (#PCDATA)>
|
---|
309 | <!ENTITY x "<x>&y;</x>">
|
---|
310 | <!ENTITY y "<y>yyy</y>">
|
---|
311 | ]>
|
---|
312 | <test>
|
---|
313 | &x;
|
---|
314 | &x;
|
---|
315 | </test>"""
|
---|
316 | expect="""10 test 0
|
---|
317 | 1 test 0
|
---|
318 | 14 #text 1
|
---|
319 | 5 x 1
|
---|
320 | 14 #text 1
|
---|
321 | 5 x 1
|
---|
322 | 14 #text 1
|
---|
323 | 15 test 0
|
---|
324 | """
|
---|
325 | res=""
|
---|
326 | err=""
|
---|
327 |
|
---|
328 | input = libxml2.inputBuffer(str_io(s))
|
---|
329 | reader = input.newTextReader("test5")
|
---|
330 | reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
|
---|
331 | while reader.Read() == 1:
|
---|
332 | res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
|
---|
333 |
|
---|
334 | if res != expect:
|
---|
335 | print("test5 failed: unexpected output")
|
---|
336 | print(res)
|
---|
337 | if err != "":
|
---|
338 | print("test5 failed: validation error found")
|
---|
339 | print(err)
|
---|
340 |
|
---|
341 | #
|
---|
342 | # cleanup
|
---|
343 | #
|
---|
344 | del input
|
---|
345 | del reader
|
---|
346 |
|
---|
347 | # Memory debug specific
|
---|
348 | libxml2.cleanupParser()
|
---|
349 | if libxml2.debugMemory(1) == 0:
|
---|
350 | print("OK")
|
---|
351 | else:
|
---|
352 | print("Memory leak %d bytes" % (libxml2.debugMemory(1)))
|
---|
353 | libxml2.dumpMemory()
|
---|