VirtualBox

source: vbox/trunk/src/libs/libxml2-2.9.2/python/drv_libxml2.py@ 61577

Last change on this file since 61577 was 58072, checked in by vboxsync, 9 years ago

libxml 2.9.2 unmodified

  • Property svn:eol-style set to native
File size: 14.8 KB
Line 
1# -*- coding: iso-8859-1 -*-
2""" A SAX2 driver for libxml2, on top of it's XmlReader API
3
4USAGE
5 # put this file (drv_libxml2.py) in PYTHONPATH
6 import xml.sax
7 reader = xml.sax.make_parser(["drv_libxml2"])
8 # ...and the rest is standard python sax.
9
10CAVEATS
11 - Lexical handlers are supported, except for start/endEntity
12 (waiting for XmlReader.ResolveEntity) and start/endDTD
13 - Error callbacks are not exactly synchronous, they tend
14 to be invoked before the corresponding content callback,
15 because the underlying reader interface parses
16 data by chunks of 512 bytes
17
18TODO
19 - search for TODO
20 - some ErrorHandler events (warning)
21 - some ContentHandler events (setDocumentLocator, skippedEntity)
22 - EntityResolver (using libxml2.?)
23 - DTDHandler (if/when libxml2 exposes such node types)
24 - DeclHandler (if/when libxml2 exposes such node types)
25 - property_xml_string?
26 - feature_string_interning?
27 - Incremental parser
28 - additional performance tuning:
29 - one might cache callbacks to avoid some name lookups
30 - one might implement a smarter way to pass attributes to startElement
31 (some kind of lazy evaluation?)
32 - there might be room for improvement in start/endPrefixMapping
33 - other?
34
35"""
36
37__author__ = "Stéphane Bidoul <sbi@skynet.be>"
38__version__ = "0.3"
39
40import sys
41import codecs
42
43if sys.version_info[0] < 3:
44 __author__ = codecs.unicode_escape_decode(__author__)[0]
45
46 StringTypes = (str, unicode)
47else:
48 StringTypes = str
49
50from xml.sax._exceptions import *
51from xml.sax import xmlreader, saxutils
52from xml.sax.handler import \
53 feature_namespaces, \
54 feature_namespace_prefixes, \
55 feature_string_interning, \
56 feature_validation, \
57 feature_external_ges, \
58 feature_external_pes, \
59 property_lexical_handler, \
60 property_declaration_handler, \
61 property_dom_node, \
62 property_xml_string
63
64# libxml2 returns strings as UTF8
65_decoder = codecs.lookup("utf8")[1]
66def _d(s):
67 if s is None:
68 return s
69 else:
70 return _decoder(s)[0]
71
72try:
73 import libxml2
74except ImportError:
75 raise SAXReaderNotAvailable("libxml2 not available: " \
76 "import error was: %s" % sys.exc_info()[1])
77
78class Locator(xmlreader.Locator):
79 """SAX Locator adapter for libxml2.xmlTextReaderLocator"""
80
81 def __init__(self,locator):
82 self.__locator = locator
83
84 def getColumnNumber(self):
85 "Return the column number where the current event ends."
86 return -1
87
88 def getLineNumber(self):
89 "Return the line number where the current event ends."
90 return self.__locator.LineNumber()
91
92 def getPublicId(self):
93 "Return the public identifier for the current event."
94 return None
95
96 def getSystemId(self):
97 "Return the system identifier for the current event."
98 return self.__locator.BaseURI()
99
100class LibXml2Reader(xmlreader.XMLReader):
101
102 def __init__(self):
103 xmlreader.XMLReader.__init__(self)
104 # features
105 self.__ns = 0
106 self.__nspfx = 0
107 self.__validate = 0
108 self.__extparams = 1
109 # parsing flag
110 self.__parsing = 0
111 # additional handlers
112 self.__lex_handler = None
113 self.__decl_handler = None
114 # error messages accumulator
115 self.__errors = None
116
117 def _errorHandler(self,arg,msg,severity,locator):
118 if self.__errors is None:
119 self.__errors = []
120 self.__errors.append((severity,
121 SAXParseException(msg,None,
122 Locator(locator))))
123
124 def _reportErrors(self,fatal):
125 for severity,exception in self.__errors:
126 if severity in (libxml2.PARSER_SEVERITY_VALIDITY_WARNING,
127 libxml2.PARSER_SEVERITY_WARNING):
128 self._err_handler.warning(exception)
129 else:
130 # when fatal is set, the parse will stop;
131 # we consider that the last error reported
132 # is the fatal one.
133 if fatal and exception is self.__errors[-1][1]:
134 self._err_handler.fatalError(exception)
135 else:
136 self._err_handler.error(exception)
137 self.__errors = None
138
139 def parse(self, source):
140 self.__parsing = 1
141 try:
142 # prepare source and create reader
143 if isinstance(source, StringTypes):
144 reader = libxml2.newTextReaderFilename(source)
145 else:
146 source = saxutils.prepare_input_source(source)
147 input = libxml2.inputBuffer(source.getByteStream())
148 reader = input.newTextReader(source.getSystemId())
149 reader.SetErrorHandler(self._errorHandler,None)
150 # configure reader
151 if self.__extparams:
152 reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
153 reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
154 reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
155 reader.SetParserProp(libxml2.PARSER_VALIDATE,self.__validate)
156 else:
157 reader.SetParserProp(libxml2.PARSER_LOADDTD, 0)
158 # we reuse attribute maps (for a slight performance gain)
159 if self.__ns:
160 attributesNSImpl = xmlreader.AttributesNSImpl({},{})
161 else:
162 attributesImpl = xmlreader.AttributesImpl({})
163 # prefixes to pop (for endPrefixMapping)
164 prefixes = []
165 # start loop
166 self._cont_handler.startDocument()
167 while 1:
168 r = reader.Read()
169 # check for errors
170 if r == 1:
171 if not self.__errors is None:
172 self._reportErrors(0)
173 elif r == 0:
174 if not self.__errors is None:
175 self._reportErrors(0)
176 break # end of parse
177 else:
178 if not self.__errors is None:
179 self._reportErrors(1)
180 else:
181 self._err_handler.fatalError(\
182 SAXException("Read failed (no details available)"))
183 break # fatal parse error
184 # get node type
185 nodeType = reader.NodeType()
186 # Element
187 if nodeType == 1:
188 if self.__ns:
189 eltName = (_d(reader.NamespaceUri()),\
190 _d(reader.LocalName()))
191 eltQName = _d(reader.Name())
192 attributesNSImpl._attrs = attrs = {}
193 attributesNSImpl._qnames = qnames = {}
194 newPrefixes = []
195 while reader.MoveToNextAttribute():
196 qname = _d(reader.Name())
197 value = _d(reader.Value())
198 if qname.startswith("xmlns"):
199 if len(qname) > 5:
200 newPrefix = qname[6:]
201 else:
202 newPrefix = None
203 newPrefixes.append(newPrefix)
204 self._cont_handler.startPrefixMapping(\
205 newPrefix,value)
206 if not self.__nspfx:
207 continue # don't report xmlns attribute
208 attName = (_d(reader.NamespaceUri()),
209 _d(reader.LocalName()))
210 qnames[attName] = qname
211 attrs[attName] = value
212 reader.MoveToElement()
213 self._cont_handler.startElementNS( \
214 eltName,eltQName,attributesNSImpl)
215 if reader.IsEmptyElement():
216 self._cont_handler.endElementNS(eltName,eltQName)
217 for newPrefix in newPrefixes:
218 self._cont_handler.endPrefixMapping(newPrefix)
219 else:
220 prefixes.append(newPrefixes)
221 else:
222 eltName = _d(reader.Name())
223 attributesImpl._attrs = attrs = {}
224 while reader.MoveToNextAttribute():
225 attName = _d(reader.Name())
226 attrs[attName] = _d(reader.Value())
227 reader.MoveToElement()
228 self._cont_handler.startElement( \
229 eltName,attributesImpl)
230 if reader.IsEmptyElement():
231 self._cont_handler.endElement(eltName)
232 # EndElement
233 elif nodeType == 15:
234 if self.__ns:
235 self._cont_handler.endElementNS( \
236 (_d(reader.NamespaceUri()),_d(reader.LocalName())),
237 _d(reader.Name()))
238 for prefix in prefixes.pop():
239 self._cont_handler.endPrefixMapping(prefix)
240 else:
241 self._cont_handler.endElement(_d(reader.Name()))
242 # Text
243 elif nodeType == 3:
244 self._cont_handler.characters(_d(reader.Value()))
245 # Whitespace
246 elif nodeType == 13:
247 self._cont_handler.ignorableWhitespace(_d(reader.Value()))
248 # SignificantWhitespace
249 elif nodeType == 14:
250 self._cont_handler.characters(_d(reader.Value()))
251 # CDATA
252 elif nodeType == 4:
253 if not self.__lex_handler is None:
254 self.__lex_handler.startCDATA()
255 self._cont_handler.characters(_d(reader.Value()))
256 if not self.__lex_handler is None:
257 self.__lex_handler.endCDATA()
258 # EntityReference
259 elif nodeType == 5:
260 if not self.__lex_handler is None:
261 self.startEntity(_d(reader.Name()))
262 reader.ResolveEntity()
263 # EndEntity
264 elif nodeType == 16:
265 if not self.__lex_handler is None:
266 self.endEntity(_d(reader.Name()))
267 # ProcessingInstruction
268 elif nodeType == 7:
269 self._cont_handler.processingInstruction( \
270 _d(reader.Name()),_d(reader.Value()))
271 # Comment
272 elif nodeType == 8:
273 if not self.__lex_handler is None:
274 self.__lex_handler.comment(_d(reader.Value()))
275 # DocumentType
276 elif nodeType == 10:
277 #if not self.__lex_handler is None:
278 # self.__lex_handler.startDTD()
279 pass # TODO (how to detect endDTD? on first non-dtd event?)
280 # XmlDeclaration
281 elif nodeType == 17:
282 pass # TODO
283 # Entity
284 elif nodeType == 6:
285 pass # TODO (entity decl)
286 # Notation (decl)
287 elif nodeType == 12:
288 pass # TODO
289 # Attribute (never in this loop)
290 #elif nodeType == 2:
291 # pass
292 # Document (not exposed)
293 #elif nodeType == 9:
294 # pass
295 # DocumentFragment (never returned by XmlReader)
296 #elif nodeType == 11:
297 # pass
298 # None
299 #elif nodeType == 0:
300 # pass
301 # -
302 else:
303 raise SAXException("Unexpected node type %d" % nodeType)
304 if r == 0:
305 self._cont_handler.endDocument()
306 reader.Close()
307 finally:
308 self.__parsing = 0
309
310 def setDTDHandler(self, handler):
311 # TODO (when supported, the inherited method works just fine)
312 raise SAXNotSupportedException("DTDHandler not supported")
313
314 def setEntityResolver(self, resolver):
315 # TODO (when supported, the inherited method works just fine)
316 raise SAXNotSupportedException("EntityResolver not supported")
317
318 def getFeature(self, name):
319 if name == feature_namespaces:
320 return self.__ns
321 elif name == feature_namespace_prefixes:
322 return self.__nspfx
323 elif name == feature_validation:
324 return self.__validate
325 elif name == feature_external_ges:
326 return 1 # TODO (does that relate to PARSER_LOADDTD)?
327 elif name == feature_external_pes:
328 return self.__extparams
329 else:
330 raise SAXNotRecognizedException("Feature '%s' not recognized" % \
331 name)
332
333 def setFeature(self, name, state):
334 if self.__parsing:
335 raise SAXNotSupportedException("Cannot set feature %s " \
336 "while parsing" % name)
337 if name == feature_namespaces:
338 self.__ns = state
339 elif name == feature_namespace_prefixes:
340 self.__nspfx = state
341 elif name == feature_validation:
342 self.__validate = state
343 elif name == feature_external_ges:
344 if state == 0:
345 # TODO (does that relate to PARSER_LOADDTD)?
346 raise SAXNotSupportedException("Feature '%s' not supported" % \
347 name)
348 elif name == feature_external_pes:
349 self.__extparams = state
350 else:
351 raise SAXNotRecognizedException("Feature '%s' not recognized" % \
352 name)
353
354 def getProperty(self, name):
355 if name == property_lexical_handler:
356 return self.__lex_handler
357 elif name == property_declaration_handler:
358 return self.__decl_handler
359 else:
360 raise SAXNotRecognizedException("Property '%s' not recognized" % \
361 name)
362
363 def setProperty(self, name, value):
364 if name == property_lexical_handler:
365 self.__lex_handler = value
366 elif name == property_declaration_handler:
367 # TODO: remove if/when libxml2 supports dtd events
368 raise SAXNotSupportedException("Property '%s' not supported" % \
369 name)
370 self.__decl_handler = value
371 else:
372 raise SAXNotRecognizedException("Property '%s' not recognized" % \
373 name)
374
375def create_parser():
376 return LibXml2Reader()
377
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette