def fromXml (self, s, *args): import libxml2 input_source = libxml2.inputBuffer(Stream (s)) reader = input_source.newTextReader("") nodetypes = ["none", "element", "attribute", "text", "cdata", "entref", "entity", "procinst", "comment", "document", "doctype", "docfragment", "notation", "whitespace", "preservewhitespace", "endelement", "endentity", "xmldecl"] def _read(): if not reader.Read(): return self.eos return [[reader.Name(), nodetypes[reader.NodeType()]], Lazy (_read)] return _read()
def tst_reader(s): f = str_io(s) input = libxml2.inputBuffer(f) reader = input.newTextReader("tst") res = "" while reader.Read(): res=res + "%s (%s) [%s] %d\n" % (reader.NodeType(),reader.Name(), reader.Value(), reader.IsEmptyElement()) if reader.NodeType() == 1: # Element while reader.MoveToNextAttribute(): res = res + "-- %s (%s) [%s]\n" % (reader.NodeType(), reader.Name(),reader.Value()) return res
def process_introspection_data(data): method_map = {} XMLREADER_START_ELEMENT_NODE_TYPE = 1 XMLREADER_END_ELEMENT_NODE_TYPE = 15 stream = cStringIO.StringIO(data.encode('utf-8')) input_source = libxml2.inputBuffer(stream) reader = input_source.newTextReader("urn:introspect") ret = reader.Read() current_iface = None current_method = None current_sigstr = '' while ret == 1: name = reader.LocalName() if reader.NodeType() == XMLREADER_START_ELEMENT_NODE_TYPE: if (not current_iface and not current_method and name == 'interface'): current_iface = reader.GetAttribute('name') elif (current_iface and not current_method and name == 'method'): current_method = reader.GetAttribute('name') if reader.IsEmptyElement(): method_map[current_iface + '.' + current_method] = '' current_method = None current_sigstr = '' elif (current_iface and current_method and name == 'arg'): direction = reader.GetAttribute('direction') if not direction or direction == 'in': current_sigstr = current_sigstr + reader.GetAttribute('type') elif reader.NodeType() == XMLREADER_END_ELEMENT_NODE_TYPE: if (current_iface and not current_method and name == 'interface'): current_iface = None if (current_iface and current_method and name == 'method'): method_map[current_iface + '.' + current_method] = current_sigstr current_method = None current_sigstr = '' ret = reader.Read() if ret != 0: raise exceptions.IntrospectionParserException(data) return method_map
def xmlvalidate(log): import libxml2 from StringIO import StringIO from random import random prefix="...%s..." % str(random()).replace('0.','') msg=[] libxml2.registerErrorHandler(lambda msg,str: msg.append(str), msg) input = libxml2.inputBuffer(StringIO(xmlEncoding.asUTF8(aString))) reader = input.newTextReader(prefix) reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) ret = reader.Read() while ret == 1: ret = reader.Read() msg=''.join(msg) for line in msg.splitlines(): if line.startswith(prefix): log(line.split(':',4)[-1].strip())
str_io = io.StringIO docstr="""<?xml version='1.0'?> <!DOCTYPE doc [ <!ENTITY tst "<p>test</p>"> ]> <doc>&tst;</doc>""" # Memory debug specific libxml2.debugMemory(1) # # First test, normal don't substitute entities. # f = str_io(docstr) input = libxml2.inputBuffer(f) reader = input.newTextReader("test_noent") ret = reader.Read() if ret != 1: print("Error reading to root") sys.exit(1) if reader.Name() == "doc" or reader.NodeType() == 10: ret = reader.Read() if ret != 1: print("Error reading to root") sys.exit(1) if reader.Name() != "doc" or reader.NodeType() != 1: print("test_normal: Error reading the root element") sys.exit(1) ret = reader.Read() if ret != 1:
# Parse the Relax NG Schemas # rngp = libxml2.relaxNGNewMemParserCtxt(schema, len(schema)) rngs = rngp.relaxNGParse() del rngp # # Parse and validate the correct document # docstr="""<foo> <label>some text</label> <item>100</item> </foo>""" f = StringIO.StringIO(docstr) input = libxml2.inputBuffer(f) reader = input.newTextReader("correct") reader.RelaxNGSetSchema(rngs) ret = reader.Read() while ret == 1: ret = reader.Read() if ret != 0: print "Error parsing the document" sys.exit(1) if reader.IsValid() != 1: print "Document failed to validate" sys.exit(1) #
def parse(self, source): self.__parsing = 1 try: # prepare source and create reader source = saxutils.prepare_input_source(source) input = libxml2.inputBuffer(source.getByteStream()) reader = input.newTextReader(source.getSystemId()) reader.SetErrorHandler(self._errorHandler, None) # configure reader reader.SetParserProp(libxml2.PARSER_LOADDTD, 1) reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS, 1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 0) reader.SetParserProp(libxml2.PARSER_VALIDATE, 0) # we reuse attribute maps (for a slight performance gain) attributesImpl = xmlreader.AttributesImpl({}) # start loop self._cont_handler.startDocument() while 1: r = reader.Read() # check for errors if r == 1: pass if self.__errors is not None: self._reportErrors(0) elif r == 0: if self.__errors is not None: self._reportErrors(0) break # end of parse else: if self.__errors is not None: self._reportErrors(1) else: self._err_handler.fatalError( SAXException("Read failed (no details available)")) break # fatal parse error # get node type nodeType = reader.NodeType() # Element if nodeType == 1: eltName = reader.Name() attributesImpl._attrs = attrs = {} while reader.MoveToNextAttribute(): attName = reader.Name() attrs[attName] = reader.Value() reader.MoveToElement() self._cont_handler.startElement(eltName, attributesImpl) if reader.IsEmptyElement(): self._cont_handler.endElement(eltName) # EndElement elif nodeType == 15: self._cont_handler.endElement(reader.Name()) # Text elif nodeType == 3: self._cont_handler.characters(reader.Value()) # SignificantWhitespace elif nodeType == 14: self._cont_handler.characters(reader.Value()) # EntityReference elif nodeType == 5: # Treating entity as such self._cont_handler.entity(reader.Name()) elif nodeType == 10: # We parse the doctype with a SAX parser nodeText = str(reader.CurrentNode()) entityDeclParser = libxml2.createPushParser( self._cont_handler, nodeText, len(nodeText), "doctype") entityDeclParser.parseChunk("", 0, 1) pass # Ignore all other node types if r == 0: self._cont_handler.endDocument() reader.Close() finally: self.__parsing = 0
# this tests the basic APIs of the XmlTextReader interface # import libxml2 import sys try: import StringIO str_io = StringIO.StringIO except: import io str_io = io.StringIO # Memory debug specific libxml2.debugMemory(1) f = str_io("""<a><b b1="b1"/><c>content of c</c></a>""") input = libxml2.inputBuffer(f) reader = input.newTextReader("test1") ret = reader.Read() if ret != 1: print("test1: Error reading to first element") sys.exit(1) if reader.Name() != "a" or reader.IsEmptyElement() != 0 or \ reader.NodeType() != 1 or reader.HasAttributes() != 0: print("test1: Error reading the first element") sys.exit(1) ret = reader.Read() if ret != 1: print("test1: Error reading to second element") sys.exit(1) if reader.Name() != "b" or reader.IsEmptyElement() != 1 or \ reader.NodeType() != 1 or reader.HasAttributes() != 1:
#!/usr/bin/python -u import sys import libxml2 try: import StringIO str_io = StringIO.StringIO except: import io str_io = io.StringIO # Memory debug specific libxml2.debugMemory(1) i = 0 while i < 5000: f = str_io("foobar") buf = libxml2.inputBuffer(f) i = i + 1 del f del buf # Memory debug specific libxml2.cleanupParser() if libxml2.debugMemory(1) == 0: print("OK") else: print("Memory leak %d bytes" % (libxml2.debugMemory(1))) libxml2.dumpMemory()
def parse(self, source): self.__parsing = 1 try: # prepare source and create reader if type(source) in StringTypes: reader = libxml2.newTextReaderFilename(source) else: source = saxutils.prepare_input_source(source) input = libxml2.inputBuffer(source.getByteStream()) reader = input.newTextReader(source.getSystemId()) reader.SetErrorHandler(self._errorHandler, None) # configure reader reader.SetParserProp(libxml2.PARSER_LOADDTD, 1) reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS, 1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1) reader.SetParserProp(libxml2.PARSER_VALIDATE, self.__validate) # we reuse attribute maps (for a slight performance gain) if self.__ns: attributesNSImpl = xmlreader.AttributesNSImpl({}, {}) else: attributesImpl = xmlreader.AttributesImpl({}) # prefixes to pop (for endPrefixMapping) prefixes = [] # start loop self._cont_handler.startDocument() while 1: r = reader.Read() # check for errors if r == 1: if not self.__errors is None: self._reportErrors(0) elif r == 0: if not self.__errors is None: self._reportErrors(0) break # end of parse else: if not self.__errors is None: self._reportErrors(1) else: self._err_handler.fatalError(\ SAXException("Read failed (no details available)")) break # fatal parse error # get node type nodeType = reader.NodeType() # Element if nodeType == 1: if self.__ns: eltName = (_d(reader.NamespaceUri()),\ _d(reader.LocalName())) eltQName = _d(reader.Name()) attributesNSImpl._attrs = attrs = {} attributesNSImpl._qnames = qnames = {} newPrefixes = [] while reader.MoveToNextAttribute(): qname = _d(reader.Name()) value = _d(reader.Value()) if qname.startswith("xmlns"): if len(qname) > 5: newPrefix = qname[6:] else: newPrefix = None newPrefixes.append(newPrefix) self._cont_handler.startPrefixMapping(\ newPrefix,value) if not self.__nspfx: continue # don't report xmlns attribute attName = (_d(reader.NamespaceUri()), _d(reader.LocalName())) qnames[attName] = qname attrs[attName] = value reader.MoveToElement() self._cont_handler.startElementNS( \ eltName,eltQName,attributesNSImpl) if reader.IsEmptyElement(): self._cont_handler.endElementNS(eltName, eltQName) for newPrefix in newPrefixes: self._cont_handler.endPrefixMapping(newPrefix) else: prefixes.append(newPrefixes) else: eltName = _d(reader.Name()) attributesImpl._attrs = attrs = {} while reader.MoveToNextAttribute(): attName = _d(reader.Name()) attrs[attName] = _d(reader.Value()) reader.MoveToElement() self._cont_handler.startElement( \ eltName,attributesImpl) if reader.IsEmptyElement(): self._cont_handler.endElement(eltName) # EndElement elif nodeType == 15: if self.__ns: self._cont_handler.endElementNS( \ (_d(reader.NamespaceUri()),_d(reader.LocalName())), _d(reader.Name())) for prefix in prefixes.pop(): self._cont_handler.endPrefixMapping(prefix) else: self._cont_handler.endElement(_d(reader.Name())) # Text elif nodeType == 3: self._cont_handler.characters(_d(reader.Value())) # Whitespace elif nodeType == 13: self._cont_handler.ignorableWhitespace(_d(reader.Value())) # SignificantWhitespace elif nodeType == 14: self._cont_handler.characters(_d(reader.Value())) # CDATA elif nodeType == 4: if not self.__lex_handler is None: self.__lex_handler.startCDATA() self._cont_handler.characters(_d(reader.Value())) if not self.__lex_handler is None: self.__lex_handler.endCDATA() # EntityReference elif nodeType == 5: if not self.__lex_handler is None: self.startEntity(_d(reader.Name())) reader.ResolveEntity() # EndEntity elif nodeType == 16: if not self.__lex_handler is None: self.endEntity(_d(reader.Name())) # ProcessingInstruction elif nodeType == 7: self._cont_handler.processingInstruction( \ _d(reader.Name()),_d(reader.Value())) # Comment elif nodeType == 8: if not self.__lex_handler is None: self.__lex_handler.comment(_d(reader.Value())) # DocumentType elif nodeType == 10: #if not self.__lex_handler is None: # self.__lex_handler.startDTD() pass # TODO (how to detect endDTD? on first non-dtd event?) # XmlDeclaration elif nodeType == 17: pass # TODO # Entity elif nodeType == 6: pass # TODO (entity decl) # Notation (decl) elif nodeType == 12: pass # TODO # Attribute (never in this loop) #elif nodeType == 2: # pass # Document (not exposed) #elif nodeType == 9: # pass # DocumentFragment (never returned by XmlReader) #elif nodeType == 11: # pass # None #elif nodeType == 0: # pass # - else: raise SAXException("Unexpected node type %d" % nodeType) if r == 0: self._cont_handler.endDocument() reader.Close() finally: self.__parsing = 0
import libxml2 docstr = """<?xml version='1.0'?> <!DOCTYPE doc [ <!ENTITY tst "<p>test</p>"> ]> <doc>&tst;</doc>""" # Memory debug specific libxml2.debugMemory(1) # # First test, normal don't substitute entities. # f = StringIO.StringIO(docstr) input = libxml2.inputBuffer(f) reader = input.newTextReader("test_noent") ret = reader.Read() if ret != 1: print "Error reading to root" sys.exit(1) if reader.Name() == "doc" or reader.NodeType() == 10: ret = reader.Read() if ret != 1: print "Error reading to root" sys.exit(1) if reader.Name() != "doc" or reader.NodeType() != 1: print "test_normal: Error reading the root element" sys.exit(1) ret = reader.Read() if ret != 1:
str_io = io.StringIO docstr = """<?xml version='1.0'?> <!DOCTYPE doc [ <!ENTITY tst "<p>test</p>"> ]> <doc>&tst;</doc>""" # Memory debug specific libxml2.debugMemory(1) # # First test, normal don't substitute entities. # f = str_io(docstr) input = libxml2.inputBuffer(f) reader = input.newTextReader("test_noent") ret = reader.Read() if ret != 1: print("Error reading to root") sys.exit(1) if reader.Name() == "doc" or reader.NodeType() == 10: ret = reader.Read() if ret != 1: print("Error reading to root") sys.exit(1) if reader.Name() != "doc" or reader.NodeType() != 1: print("test_normal: Error reading the root element") sys.exit(1) ret = reader.Read() if ret != 1:
1,x 1,c 3,#text 15,c 15,x 14,#text 1,b 3,#text 15,b 14,#text 15,test """ res="" err="" input = libxml2.inputBuffer(StringIO.StringIO(s)) reader = input.newTextReader("test2") reader.SetParserProp(libxml2.PARSER_LOADDTD,1) reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) reader.SetParserProp(libxml2.PARSER_VALIDATE,1) while reader.Read() == 1: res = res + "%s,%s\n" % (reader.NodeType(),reader.Name()) if res != expect: print "test2 failed: unexpected output" print res sys.exit(1) if err != "": print "test2 failed: validation error found" print err
def parse(self, source): self.__parsing = 1 try: # prepare source and create reader if type(source) in StringTypes: reader = libxml2.newTextReaderFilename(source) else: source = saxutils.prepare_input_source(source) input = libxml2.inputBuffer(source.getByteStream()) reader = input.newTextReader(source.getSystemId()) reader.SetErrorHandler(self._errorHandler,None) # configure reader reader.SetParserProp(libxml2.PARSER_LOADDTD,1) reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) reader.SetParserProp(libxml2.PARSER_VALIDATE,self.__validate) # we reuse attribute maps (for a slight performance gain) if self.__ns: attributesNSImpl = xmlreader.AttributesNSImpl({},{}) else: attributesImpl = xmlreader.AttributesImpl({}) # prefixes to pop (for endPrefixMapping) prefixes = [] # start loop self._cont_handler.startDocument() while 1: r = reader.Read() # check for errors if r == 1: if not self.__errors is None: self._reportErrors(0) elif r == 0: if not self.__errors is None: self._reportErrors(0) break # end of parse else: if not self.__errors is None: self._reportErrors(1) else: self._err_handler.fatalError(\ SAXException("Read failed (no details available)")) break # fatal parse error # get node type nodeType = reader.NodeType() # Element if nodeType == 1: if self.__ns: eltName = (_d(reader.NamespaceUri()),\ _d(reader.LocalName())) eltQName = _d(reader.Name()) attributesNSImpl._attrs = attrs = {} attributesNSImpl._qnames = qnames = {} newPrefixes = [] while reader.MoveToNextAttribute(): qname = _d(reader.Name()) value = _d(reader.Value()) if qname.startswith("xmlns"): if len(qname) > 5: newPrefix = qname[6:] else: newPrefix = None newPrefixes.append(newPrefix) self._cont_handler.startPrefixMapping(\ newPrefix,value) if not self.__nspfx: continue # don't report xmlns attribute attName = (_d(reader.NamespaceUri()), _d(reader.LocalName())) qnames[attName] = qname attrs[attName] = value reader.MoveToElement() self._cont_handler.startElementNS( \ eltName,eltQName,attributesNSImpl) if reader.IsEmptyElement(): self._cont_handler.endElementNS(eltName,eltQName) for newPrefix in newPrefixes: self._cont_handler.endPrefixMapping(newPrefix) else: prefixes.append(newPrefixes) else: eltName = _d(reader.Name()) attributesImpl._attrs = attrs = {} while reader.MoveToNextAttribute(): attName = _d(reader.Name()) attrs[attName] = _d(reader.Value()) reader.MoveToElement() self._cont_handler.startElement( \ eltName,attributesImpl) if reader.IsEmptyElement(): self._cont_handler.endElement(eltName) # EndElement elif nodeType == 15: if self.__ns: self._cont_handler.endElementNS( \ (_d(reader.NamespaceUri()),_d(reader.LocalName())), _d(reader.Name())) for prefix in prefixes.pop(): self._cont_handler.endPrefixMapping(prefix) else: self._cont_handler.endElement(_d(reader.Name())) # Text elif nodeType == 3: self._cont_handler.characters(_d(reader.Value())) # Whitespace elif nodeType == 13: self._cont_handler.ignorableWhitespace(_d(reader.Value())) # SignificantWhitespace elif nodeType == 14: self._cont_handler.characters(_d(reader.Value())) # CDATA elif nodeType == 4: if not self.__lex_handler is None: self.__lex_handler.startCDATA() self._cont_handler.characters(_d(reader.Value())) if not self.__lex_handler is None: self.__lex_handler.endCDATA() # EntityReference elif nodeType == 5: if not self.__lex_handler is None: self.startEntity(_d(reader.Name())) reader.ResolveEntity() # EndEntity elif nodeType == 16: if not self.__lex_handler is None: self.endEntity(_d(reader.Name())) # ProcessingInstruction elif nodeType == 7: self._cont_handler.processingInstruction( \ _d(reader.Name()),_d(reader.Value())) # Comment elif nodeType == 8: if not self.__lex_handler is None: self.__lex_handler.comment(_d(reader.Value())) # DocumentType elif nodeType == 10: #if not self.__lex_handler is None: # self.__lex_handler.startDTD() pass # TODO (how to detect endDTD? on first non-dtd event?) # XmlDeclaration elif nodeType == 17: pass # TODO # Entity elif nodeType == 6: pass # TODO (entity decl) # Notation (decl) elif nodeType == 12: pass # TODO # Attribute (never in this loop) #elif nodeType == 2: # pass # Document (not exposed) #elif nodeType == 9: # pass # DocumentFragment (never returned by XmlReader) #elif nodeType == 11: # pass # None #elif nodeType == 0: # pass # - else: raise SAXException("Unexpected node type %d" % nodeType) if r == 0: self._cont_handler.endDocument() reader.Close() finally: self.__parsing = 0
#!/usr/bin/python -u import sys import libxml2 import StringIO # Memory debug specific libxml2.debugMemory(1) i = 0 while i < 5000: f = StringIO.StringIO("foobar") buf = libxml2.inputBuffer(f) i = i + 1 del f del buf # Memory debug specific libxml2.cleanupParser() if libxml2.debugMemory(1) == 0: print "OK" else: print "Memory leak %d bytes" % (libxml2.debugMemory(1)) libxml2.dumpMemory()