VirtualBox

source: vbox/trunk/src/libs/libxml2-2.12.6/python/drv_libxml2.py@ 104932

Last change on this file since 104932 was 104106, checked in by vboxsync, 9 months ago

libxml2-2.9.14: Applied and adjusted our libxml2 changes to 2.9.14. bugref:10640

  • Property svn:eol-style set to native
File size: 14.9 KB
Line 
1# -*- coding: iso-8859-1 -*-
2""" A SAX2 driver for libxml2, on top of it's XmlReader API
3
4USAGE
5 # put this file (drv_libxml2.py) in PYTHONPATH
6 import xml.sax
7 reader = xml.sax.make_parser(["drv_libxml2"])
8 # ...and the rest is standard python sax.
9
10CAVEATS
11 - Lexical handlers are supported, except for start/endEntity
12 (waiting for XmlReader.ResolveEntity) and start/endDTD
13 - Error callbacks are not exactly synchronous, they tend
14 to be invoked before the corresponding content callback,
15 because the underlying reader interface parses
16 data by chunks of 512 bytes
17
18TODO
19 - search for TODO
20 - some ErrorHandler events (warning)
21 - some ContentHandler events (setDocumentLocator, skippedEntity)
22 - EntityResolver (using libxml2.?)
23 - DTDHandler (if/when libxml2 exposes such node types)
24 - DeclHandler (if/when libxml2 exposes such node types)
25 - property_xml_string?
26 - feature_string_interning?
27 - Incremental parser
28 - additional performance tuning:
29 - one might cache callbacks to avoid some name lookups
30 - one might implement a smarter way to pass attributes to startElement
31 (some kind of lazy evaluation?)
32 - there might be room for improvement in start/endPrefixMapping
33 - other?
34
35"""
36
37__author__ = "Stéphane Bidoul <sbi@skynet.be>"
38__version__ = "0.3"
39
40import sys
41import codecs
42
43if sys.version_info[0] < 3:
44 __author__ = codecs.unicode_escape_decode(__author__)[0]
45
46 StringTypes = (str, unicode)
47 # libxml2 returns strings as UTF8
48 _decoder = codecs.lookup("utf8")[1]
49 def _d(s):
50 if s is None:
51 return s
52 else:
53 return _decoder(s)[0]
54else:
55 StringTypes = str
56 # s is Unicode `str` already
57 def _d(s):
58 return s
59
60from xml.sax._exceptions import *
61from xml.sax import xmlreader, saxutils
62from xml.sax.handler import \
63 feature_namespaces, \
64 feature_namespace_prefixes, \
65 feature_string_interning, \
66 feature_validation, \
67 feature_external_ges, \
68 feature_external_pes, \
69 property_lexical_handler, \
70 property_declaration_handler, \
71 property_dom_node, \
72 property_xml_string
73
74try:
75 import libxml2
76except ImportError:
77 raise SAXReaderNotAvailable("libxml2 not available: " \
78 "import error was: %s" % sys.exc_info()[1])
79
80class Locator(xmlreader.Locator):
81 """SAX Locator adapter for libxml2.xmlTextReaderLocator"""
82
83 def __init__(self,locator):
84 self.__locator = locator
85
86 def getColumnNumber(self):
87 "Return the column number where the current event ends."
88 return -1
89
90 def getLineNumber(self):
91 "Return the line number where the current event ends."
92 return self.__locator.LineNumber()
93
94 def getPublicId(self):
95 "Return the public identifier for the current event."
96 return None
97
98 def getSystemId(self):
99 "Return the system identifier for the current event."
100 return self.__locator.BaseURI()
101
102class LibXml2Reader(xmlreader.XMLReader):
103
104 def __init__(self):
105 xmlreader.XMLReader.__init__(self)
106 # features
107 self.__ns = 0
108 self.__nspfx = 0
109 self.__validate = 0
110 self.__extparams = 1
111 # parsing flag
112 self.__parsing = 0
113 # additional handlers
114 self.__lex_handler = None
115 self.__decl_handler = None
116 # error messages accumulator
117 self.__errors = None
118
119 def _errorHandler(self,arg,msg,severity,locator):
120 if self.__errors is None:
121 self.__errors = []
122 self.__errors.append((severity,
123 SAXParseException(msg,None,
124 Locator(locator))))
125
126 def _reportErrors(self,fatal):
127 for severity,exception in self.__errors:
128 if severity in (libxml2.PARSER_SEVERITY_VALIDITY_WARNING,
129 libxml2.PARSER_SEVERITY_WARNING):
130 self._err_handler.warning(exception)
131 else:
132 # when fatal is set, the parse will stop;
133 # we consider that the last error reported
134 # is the fatal one.
135 if fatal and exception is self.__errors[-1][1]:
136 self._err_handler.fatalError(exception)
137 else:
138 self._err_handler.error(exception)
139 self.__errors = None
140
141 def parse(self, source):
142 self.__parsing = 1
143 try:
144 # prepare source and create reader
145 if isinstance(source, StringTypes):
146 reader = libxml2.newTextReaderFilename(source)
147 else:
148 source = saxutils.prepare_input_source(source)
149 input = libxml2.inputBuffer(source.getByteStream())
150 reader = input.newTextReader(source.getSystemId())
151 reader.SetErrorHandler(self._errorHandler,None)
152 # configure reader
153 if self.__extparams:
154 reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
155 reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
156 reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
157 reader.SetParserProp(libxml2.PARSER_VALIDATE,self.__validate)
158 else:
159 reader.SetParserProp(libxml2.PARSER_LOADDTD, 0)
160 # we reuse attribute maps (for a slight performance gain)
161 if self.__ns:
162 attributesNSImpl = xmlreader.AttributesNSImpl({},{})
163 else:
164 attributesImpl = xmlreader.AttributesImpl({})
165 # prefixes to pop (for endPrefixMapping)
166 prefixes = []
167 # start loop
168 self._cont_handler.startDocument()
169 while 1:
170 r = reader.Read()
171 # check for errors
172 if r == 1:
173 if not self.__errors is None:
174 self._reportErrors(0)
175 elif r == 0:
176 if not self.__errors is None:
177 self._reportErrors(0)
178 break # end of parse
179 else:
180 if not self.__errors is None:
181 self._reportErrors(1)
182 else:
183 self._err_handler.fatalError(\
184 SAXException("Read failed (no details available)"))
185 break # fatal parse error
186 # get node type
187 nodeType = reader.NodeType()
188 # Element
189 if nodeType == 1:
190 if self.__ns:
191 eltName = (_d(reader.NamespaceUri()),\
192 _d(reader.LocalName()))
193 eltQName = _d(reader.Name())
194 attributesNSImpl._attrs = attrs = {}
195 attributesNSImpl._qnames = qnames = {}
196 newPrefixes = []
197 while reader.MoveToNextAttribute():
198 qname = _d(reader.Name())
199 value = _d(reader.Value())
200 if qname.startswith("xmlns"):
201 if len(qname) > 5:
202 newPrefix = qname[6:]
203 else:
204 newPrefix = None
205 newPrefixes.append(newPrefix)
206 self._cont_handler.startPrefixMapping(\
207 newPrefix,value)
208 if not self.__nspfx:
209 continue # don't report xmlns attribute
210 attName = (_d(reader.NamespaceUri()),
211 _d(reader.LocalName()))
212 qnames[attName] = qname
213 attrs[attName] = value
214 reader.MoveToElement()
215 self._cont_handler.startElementNS( \
216 eltName,eltQName,attributesNSImpl)
217 if reader.IsEmptyElement():
218 self._cont_handler.endElementNS(eltName,eltQName)
219 for newPrefix in newPrefixes:
220 self._cont_handler.endPrefixMapping(newPrefix)
221 else:
222 prefixes.append(newPrefixes)
223 else:
224 eltName = _d(reader.Name())
225 attributesImpl._attrs = attrs = {}
226 while reader.MoveToNextAttribute():
227 attName = _d(reader.Name())
228 attrs[attName] = _d(reader.Value())
229 reader.MoveToElement()
230 self._cont_handler.startElement( \
231 eltName,attributesImpl)
232 if reader.IsEmptyElement():
233 self._cont_handler.endElement(eltName)
234 # EndElement
235 elif nodeType == 15:
236 if self.__ns:
237 self._cont_handler.endElementNS( \
238 (_d(reader.NamespaceUri()),_d(reader.LocalName())),
239 _d(reader.Name()))
240 for prefix in prefixes.pop():
241 self._cont_handler.endPrefixMapping(prefix)
242 else:
243 self._cont_handler.endElement(_d(reader.Name()))
244 # Text
245 elif nodeType == 3:
246 self._cont_handler.characters(_d(reader.Value()))
247 # Whitespace
248 elif nodeType == 13:
249 self._cont_handler.ignorableWhitespace(_d(reader.Value()))
250 # SignificantWhitespace
251 elif nodeType == 14:
252 self._cont_handler.characters(_d(reader.Value()))
253 # CDATA
254 elif nodeType == 4:
255 if not self.__lex_handler is None:
256 self.__lex_handler.startCDATA()
257 self._cont_handler.characters(_d(reader.Value()))
258 if not self.__lex_handler is None:
259 self.__lex_handler.endCDATA()
260 # EntityReference
261 elif nodeType == 5:
262 if not self.__lex_handler is None:
263 self.startEntity(_d(reader.Name()))
264 reader.ResolveEntity()
265 # EndEntity
266 elif nodeType == 16:
267 if not self.__lex_handler is None:
268 self.endEntity(_d(reader.Name()))
269 # ProcessingInstruction
270 elif nodeType == 7:
271 self._cont_handler.processingInstruction( \
272 _d(reader.Name()),_d(reader.Value()))
273 # Comment
274 elif nodeType == 8:
275 if not self.__lex_handler is None:
276 self.__lex_handler.comment(_d(reader.Value()))
277 # DocumentType
278 elif nodeType == 10:
279 #if not self.__lex_handler is None:
280 # self.__lex_handler.startDTD()
281 pass # TODO (how to detect endDTD? on first non-dtd event?)
282 # XmlDeclaration
283 elif nodeType == 17:
284 pass # TODO
285 # Entity
286 elif nodeType == 6:
287 pass # TODO (entity decl)
288 # Notation (decl)
289 elif nodeType == 12:
290 pass # TODO
291 # Attribute (never in this loop)
292 #elif nodeType == 2:
293 # pass
294 # Document (not exposed)
295 #elif nodeType == 9:
296 # pass
297 # DocumentFragment (never returned by XmlReader)
298 #elif nodeType == 11:
299 # pass
300 # None
301 #elif nodeType == 0:
302 # pass
303 # -
304 else:
305 raise SAXException("Unexpected node type %d" % nodeType)
306 if r == 0:
307 self._cont_handler.endDocument()
308 reader.Close()
309 finally:
310 self.__parsing = 0
311
312 def setDTDHandler(self, handler):
313 # TODO (when supported, the inherited method works just fine)
314 raise SAXNotSupportedException("DTDHandler not supported")
315
316 def setEntityResolver(self, resolver):
317 # TODO (when supported, the inherited method works just fine)
318 raise SAXNotSupportedException("EntityResolver not supported")
319
320 def getFeature(self, name):
321 if name == feature_namespaces:
322 return self.__ns
323 elif name == feature_namespace_prefixes:
324 return self.__nspfx
325 elif name == feature_validation:
326 return self.__validate
327 elif name == feature_external_ges:
328 return 1 # TODO (does that relate to PARSER_LOADDTD)?
329 elif name == feature_external_pes:
330 return self.__extparams
331 else:
332 raise SAXNotRecognizedException("Feature '%s' not recognized" % \
333 name)
334
335 def setFeature(self, name, state):
336 if self.__parsing:
337 raise SAXNotSupportedException("Cannot set feature %s " \
338 "while parsing" % name)
339 if name == feature_namespaces:
340 self.__ns = state
341 elif name == feature_namespace_prefixes:
342 self.__nspfx = state
343 elif name == feature_validation:
344 self.__validate = state
345 elif name == feature_external_ges:
346 if state == 0:
347 # TODO (does that relate to PARSER_LOADDTD)?
348 raise SAXNotSupportedException("Feature '%s' not supported" % \
349 name)
350 elif name == feature_external_pes:
351 self.__extparams = state
352 else:
353 raise SAXNotRecognizedException("Feature '%s' not recognized" % \
354 name)
355
356 def getProperty(self, name):
357 if name == property_lexical_handler:
358 return self.__lex_handler
359 elif name == property_declaration_handler:
360 return self.__decl_handler
361 else:
362 raise SAXNotRecognizedException("Property '%s' not recognized" % \
363 name)
364
365 def setProperty(self, name, value):
366 if name == property_lexical_handler:
367 self.__lex_handler = value
368 elif name == property_declaration_handler:
369 # TODO: remove if/when libxml2 supports dtd events
370 raise SAXNotSupportedException("Property '%s' not supported" % \
371 name)
372 self.__decl_handler = value
373 else:
374 raise SAXNotRecognizedException("Property '%s' not recognized" % \
375 name)
376
377def create_parser():
378 return LibXml2Reader()
379
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette