1 | #!/usr/bin/python -u
2 | # -*- coding: utf-8 -*-
3 | #
4 | # this tests the DTD validation with the XmlTextReader interface
5 | #
6 | import sys
7 | import glob
8 | import string
9 | import libxml2
10 | try:
11 | import StringIO
12 | str_io = StringIO.StringIO
13 | except:
14 | import io
15 | str_io = io.StringIO
16 |
17 | # Memory debug specific
18 | libxml2.debugMemory(1)
19 |
20 | err = ""
21 | dir_prefix = "../../test/valid/"
22 | # This dictionary reflects the contents of the files
23 | # ../../test/valid/*.xml.err that are not empty, except that
24 | # the file paths in the messages start with ../../test/
25 |
26 | expect = {
27 | '766956':
28 | """../../test/valid/dtds/766956.dtd:2: parser error : PEReference: expecting ';'
29 | %ä%ent;
30 | ^
31 | ../../test/valid/dtds/766956.dtd:2: parser error : Content error in the external subset
32 | %ä%ent;
33 | ^
34 | Entity: line 1:
35 | value
36 | ^
37 | """,
38 | '781333':
39 | """../../test/valid/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got
40 | <a/>
41 | ^
42 | ../../test/valid/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more child
43 |
44 | ^
45 | """,
46 | 'cond_sect2':
47 | """../../test/valid/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
48 | %ent;
49 | ^
50 | Entity: line 1:
51 | ]]>
52 | ^
53 | ../../test/valid/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
54 |
55 | ^
56 | """,
57 | 'rss':
58 | """../../test/valid/rss.xml:177: element rss: validity error : Element rss does not carry attribute version
59 | </rss>
60 | ^
61 | """,
62 | 't8':
63 | """../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
64 |
65 | %defroot; %defmiddle; %deftest;
66 | ^
67 | Entity: line 1:
68 | <!ELEMENT root (middle) >
69 | ^
70 | ../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
71 |
72 | %defroot; %defmiddle; %deftest;
73 | ^
74 | Entity: line 1:
75 | <!ELEMENT middle (test) >
76 | ^
77 | ../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
78 |
79 | %defroot; %defmiddle; %deftest;
80 | ^
81 | Entity: line 1:
82 | <!ELEMENT test (#PCDATA) >
83 | ^
84 | """,
85 | 't8a':
86 | """../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
87 |
88 | %defroot;%defmiddle;%deftest;
89 | ^
90 | Entity: line 1:
91 | <!ELEMENT root (middle) >
92 | ^
93 | ../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
94 |
95 | %defroot;%defmiddle;%deftest;
96 | ^
97 | Entity: line 1:
98 | <!ELEMENT middle (test) >
99 | ^
100 | ../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
101 |
102 | %defroot;%defmiddle;%deftest;
103 | ^
104 | Entity: line 1:
105 | <!ELEMENT test (#PCDATA) >
106 | ^
107 | """,
108 | 'xlink':
109 | """../../test/valid/xlink.xml:450: element termdef: validity error : ID dt-arc already defined
110 | <p><termdef id="dt-arc" term="Arc">An <ter
111 | ^
112 | validity error : attribute def line 199 references an unknown ID "dt-xlg"
113 | """,
114 | }
115 |
116 | # Add prefix_dir and extension to the keys
117 | expect = {"{}{}.xml".format(dir_prefix, key): val for key, val in expect.items()}
118 |
119 | def callback(ctx, str):
120 | global err
121 | err = err + "%s" % (str)
122 | libxml2.registerErrorHandler(callback, "")
123 |
124 | parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"]
125 | expect_parsing_error = ["{}{}.xml".format(dir_prefix, f) for f in parsing_error_files]
126 |
127 | valid_files = glob.glob(dir_prefix + "*.x*")
128 | valid_files.sort()
129 | for file in valid_files:
130 | err = ""
131 | reader = libxml2.newTextReaderFilename(file)
132 | #print "%s:" % (file)
133 | reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
134 | ret = reader.Read()
135 | while ret == 1:
136 | ret = reader.Read()
137 | if ret != 0 and file not in expect_parsing_error:
138 | print("Error parsing and validating %s" % (file))
139 | #sys.exit(1)
140 | if (err):
141 | if not(file in expect and err == expect[file]):
142 | print("Error: ", err)
143 | if file in expect:
144 | print("Expected: ", expect[file])
145 | #
146 | # another separate test based on Stephane Bidoul one
147 | #
148 | s = """
149 | <!DOCTYPE test [
150 | <!ELEMENT test (x,b)>
151 | <!ELEMENT x (c)>
152 | <!ELEMENT b (#PCDATA)>
153 | <!ELEMENT c (#PCDATA)>
154 | <!ENTITY x "<x><c>xxx</c></x>">
155 | ]>
156 | <test>
157 | &x;
158 | <b>bbb</b>
159 | </test>
160 | """
161 | expect="""10,test
162 | 1,test
163 | 14,#text
164 | 1,x
165 | 1,c
166 | 3,#text
167 | 15,c
168 | 15,x
169 | 14,#text
170 | 1,b
171 | 3,#text
172 | 15,b
173 | 14,#text
174 | 15,test
175 | """
176 | res=""
177 | err=""
178 |
179 | input = libxml2.inputBuffer(str_io(s))
180 | reader = input.newTextReader("test2")
181 | reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
182 | reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
183 | reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
184 | reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
185 | while reader.Read() == 1:
186 | res = res + "%s,%s\n" % (reader.NodeType(),reader.Name())
187 |
188 | if res != expect:
189 | print("test2 failed: unexpected output")
190 | print(res)
191 | sys.exit(1)
192 | if err != "":
193 | print("test2 failed: validation error found")
194 | print(err)
195 | sys.exit(1)
196 |
197 | #
198 | # Another test for external entity parsing and validation
199 | #
200 |
201 | s = """<!DOCTYPE test [
202 | <!ELEMENT test (x)>
203 | <!ELEMENT x (#PCDATA)>
204 | <!ENTITY e SYSTEM "tst.ent">
205 | ]>
206 | <test>
207 | &e;
208 | </test>
209 | """
210 | tst_ent = """<x>hello</x>"""
211 | expect="""10 test
212 | 1 test
213 | 14 #text
214 | 1 x
215 | 3 #text
216 | 15 x
217 | 14 #text
218 | 15 test
219 | """
220 | res=""
221 |
222 | def myResolver(URL, ID, ctxt):
223 | if URL == "tst.ent":
224 | return(str_io(tst_ent))
225 | return None
226 |
227 | libxml2.setEntityLoader(myResolver)
228 |
229 | input = libxml2.inputBuffer(str_io(s))
230 | reader = input.newTextReader("test3")
231 | reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
232 | reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
233 | reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
234 | reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
235 | while reader.Read() == 1:
236 | res = res + "%s %s\n" % (reader.NodeType(),reader.Name())
237 |
238 | if res != expect:
239 | print("test3 failed: unexpected output")
240 | print(res)
241 | sys.exit(1)
242 | if err != "":
243 | print("test3 failed: validation error found")
244 | print(err)
245 | sys.exit(1)
246 |
247 | #
248 | # Another test for recursive entity parsing, validation, and replacement of
249 | # entities, making sure the entity ref node doesn't show up in that case
250 | #
251 |
252 | s = """<!DOCTYPE test [
253 | <!ELEMENT test (x, x)>
254 | <!ELEMENT x (y)>
255 | <!ELEMENT y (#PCDATA)>
256 | <!ENTITY x "<x>&y;</x>">
257 | <!ENTITY y "<y>yyy</y>">
258 | ]>
259 | <test>
260 | &x;
261 | &x;
262 | </test>"""
263 | expect="""10 test 0
264 | 1 test 0
265 | 14 #text 1
266 | 1 x 1
267 | 1 y 2
268 | 3 #text 3
269 | 15 y 2
270 | 15 x 1
271 | 14 #text 1
272 | 1 x 1
273 | 1 y 2
274 | 3 #text 3
275 | 15 y 2
276 | 15 x 1
277 | 14 #text 1
278 | 15 test 0
279 | """
280 | res=""
281 | err=""
282 |
283 | input = libxml2.inputBuffer(str_io(s))
284 | reader = input.newTextReader("test4")
285 | reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
286 | reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
287 | reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
288 | reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
289 | while reader.Read() == 1:
290 | res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
291 |
292 | if res != expect:
293 | print("test4 failed: unexpected output")
294 | print(res)
295 | sys.exit(1)
296 | if err != "":
297 | print("test4 failed: validation error found")
298 | print(err)
299 | sys.exit(1)
300 |
301 | #
302 | # The same test but without entity substitution this time
303 | #
304 |
305 | s = """<!DOCTYPE test [
306 | <!ELEMENT test (x, x)>
307 | <!ELEMENT x (y)>
308 | <!ELEMENT y (#PCDATA)>
309 | <!ENTITY x "<x>&y;</x>">
310 | <!ENTITY y "<y>yyy</y>">
311 | ]>
312 | <test>
313 | &x;
314 | &x;
315 | </test>"""
316 | expect="""10 test 0
317 | 1 test 0
318 | 14 #text 1
319 | 5 x 1
320 | 14 #text 1
321 | 5 x 1
322 | 14 #text 1
323 | 15 test 0
324 | """
325 | res=""
326 | err=""
327 |
328 | input = libxml2.inputBuffer(str_io(s))
329 | reader = input.newTextReader("test5")
330 | reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
331 | while reader.Read() == 1:
332 | res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
333 |
334 | if res != expect:
335 | print("test5 failed: unexpected output")
336 | print(res)
337 | if err != "":
338 | print("test5 failed: validation error found")
339 | print(err)
340 |
341 | #
342 | # cleanup
343 | #
344 | del input
345 | del reader
346 |
347 | # Memory debug specific
348 | libxml2.cleanupParser()
349 | if libxml2.debugMemory(1) == 0:
350 | print("OK")
351 | else:
352 | print("Memory leak %d bytes" % (libxml2.debugMemory(1)))
353 | libxml2.dumpMemory()