def xml(xmlin): """ Parse some XML. Argument xmlin can be a string, the filename of some XML; or an open file, from which xml is read. The return value is the parsed XML as DOM nodes. """ filename = None # A string argument is a file name. if isinstance(xmlin, types.StringTypes): filename = _findFile(xmlin) if not filename: raise "Couldn't find XML to parse: %s" % xmlin if filename: if _xmlcache.has_key(filename): return _xmlcache[filename] xmlin = open(filename) xmldata = xmlin.read() if bDomlette: doc = NonvalidatingReader.parseString(xmldata, filename or ' ') else: doc = PyExpat.Reader().fromString(xmldata) parsedxml = HandyXmlWrapper(doc.documentElement) if filename: _xmlcache[filename] = parsedxml return parsedxml
def link_title_invert(): #build a DOM tree from the file reader = PyExpat.Reader() doc = reader.fromString(test_doc) h2_elements = doc.getElementsByTagNameNS('http://www.w3.org/1999/xhtml', 'h2') for e in h2_elements: parent = e.parentNode a_list = filter( lambda x: (x.nodeType == Node.ELEMENT_NODE) and (x.localName == 'a'), e.childNodes) a = a_list[0] e.removeChild(a) for node in a.childNodes: #Automatically also removes the child from a e.appendChild(node) parent.replaceChild(a, e) a.appendChild(e) ext.Print(doc) #reclaim the object; not necessary with Python 2.0 reader.releaseNode(doc)
def test_local_serialize_schema(self): from ZSI import SoapWriter from ZSI import _child_elements from xml.dom.ext.reader import PyExpat msg = self.client_module.DSQueryRegistrySoapOut() msg.DSQueryRegistryResult = msg.new_DSQueryRegistryResult() msg.DSQueryRegistryResult.Any = 'hi' input = open('wsdl/nvo-admin.wsdl').read() reader = PyExpat.Reader() dom = reader.fromString(input) dnode = _child_elements(dom)[0] tnode = _child_elements(dnode)[0] snode = _child_elements(tnode)[0] msg.DSQueryRegistryResult.Schema = snode sw = SoapWriter() sw.serialize(msg) soap = str(sw) print(soap) ps = ParsedSoap(soap) pyobj = ps.Parse(msg.typecode) self.assertEqual(pyobj.DSQueryRegistryResult.Any, msg.DSQueryRegistryResult.Any) self.assertTrue(_is_element(pyobj.DSQueryRegistryResult.Schema)) print(_get_element_nsuri_name(pyobj.DSQueryRegistryResult.Schema)) self.assertEqual( _get_element_nsuri_name(pyobj.DSQueryRegistryResult.Schema), ('http://www.w3.org/2001/XMLSchema', 'schema'))
class HtmlDom: def __init__(self, url): try: f = file(url) data = f.read() f.close() except IOError, e: try: result = fetch(url, agent=MOZILLA_AGENT) data = result['data'] except: raise IOError, 'invalid URL' # create parser parser = tidy.TreeBuilder() parser.feed(data) xmlText = _etree.tostring(parser.close()) #create the DOM reader = PyExpat.Reader() self.dom = reader.fromString(xmlText) self.nss = {u'html': XHTML_NAMESPACE} self.context = xml.xpath.Context.Context(self.dom, processorNss=self.nss)
def Test(tester): tester.startGroup("Testing PyExpat") reader = PyExpat.Reader() tester.startTest('Basic test') doc = reader.fromString(source_1) stream = cStringIO.StringIO() Print(doc, stream=stream) result = stream.getvalue() print result #if result != expected_1: # tester.error('Expected\n"""%s"""\ngot\n"""%s"""'%(repr(expected_1), repr(result))) reader.releaseNode(doc) tester.groupDone() tester.startGroup("Testing Sax2") reader = Sax2.Reader() tester.startTest('Basic test') doc = reader.fromString(source_1) stream = cStringIO.StringIO() Print(doc, stream=stream) result = stream.getvalue() print result #if result != expected_1: # tester.error('Expected\n"""%s"""\ngot\n"""%s"""'%(repr(expected_1), repr(result))) reader.releaseNode(doc) return tester.groupDone()
def getXmlDom(xml_string): # Create the PyExpat reader reader = PyExpat.Reader() # Create DOM tree from the xml string dom_tree = reader.fromString(xml_string) return dom_tree.documentElement
def read_xml_from_file(fileName): #build a DOM tree from the file reader = PyExpat.Reader() xml_dom_object = reader.fromUri(fileName) ext.Print(xml_dom_object) #reclaim the object reader.releaseNode(xml_dom_object)
def checkt5(self): istr = StringIO.StringIO(intext) m = mimetools.Message(istr) if m.gettype()[0:10] == "multipart/": cid = resolvers.MIMEResolver(m['content-type'], istr) xml = cid.GetSOAPPart() print 'xml=', xml.getvalue() for h, b in cid.parts: print h, b.read() dom = PyExpat.Reader().fromStream(xml) print dom
def __init__(self, uri): try: xmlstr = open(uri, 'r').read() # PyExpat can't handle Windows-1252, but it's all ASCII anyway, # so claim utf-8. xmlstr = xmlstr.replace('encoding = "Windows-1252"', 'encoding="UTF-8"') xmlstm = StringIO.StringIO(xmlstr) self.reader = PyExpat.Reader() self.doc = self.reader.fromStream(xmlstm) self.documentElement = self.doc.documentElement except ExpatError, msg: raise "XML error: %s in %s" % (msg, uri)
def getKopeteHistory(xml_string): # Create the PyExpat reader reader = PyExpat.Reader() # Create DOM tree from the xml string dom_tree = reader.fromString(xml_string) # Check the type of the file if dom_tree.doctype.name != 'kopete-history': output("ERROR: " + file_path + " is not a kopete chat history file.") return None dom_root = dom_tree.documentElement # Get month and year year = None month = None headers = dom_root.getElementsByTagName("head") for header_item in headers[0].childNodes: if header_item.nodeName == 'date': for attribute in header_item.attributes: if attribute.nodeName == 'year': year = int(attribute.nodeValue) if attribute.nodeName == 'month': month = int(attribute.nodeValue) # Get all the messages history = [] message_list = dom_root.getElementsByTagName("msg") for message in message_list: msg_item = {} for attribute in message.attributes: if attribute.nodeName == 'from': msg_item['user'] = attribute.nodeValue if attribute.nodeName == 'time': raw_str = attribute.nodeValue.split(' ') day = int(raw_str[0]) time = raw_str[1].split(':') hour = int(time[0]) minute = int(time[1]) second = int(time[2]) msg_item['date'] = datetime(year, month, day, hour, minute, second) for k in range(message.childNodes.length): text = message.childNodes[k] if text.nodeType == Node.TEXT_NODE: msg_item['text'] = text.nodeValue history.append(msg_item) return history
def parse_propfind(xml_doc): """ parse an propfind xml file and return a list of props returns: request_type -- ALLPROP, PROPNAME, PROP proplist -- list of properties found namespaces -- list of namespaces found """ doc = PyExpat.Reader().fromString(xml_doc) snit = doc.createNodeIterator(doc, NodeFilter.NodeFilter.SHOW_ELEMENT, None, None) request_type = None props = {} namespaces = [] while 1: curr_elem = snit.nextNode() if not curr_elem: break ename = fname = lower(curr_elem.nodeName) if ":" in fname: ename = split(fname, ":")[1] if ename == "prop": request_type = RT_PROP continue if ename == "propfind": continue if ename == "allprop": request_type = RT_ALLPROP continue if ename == "propname": request_type = RT_PROPNAME continue # rest should be names of attributes ns = curr_elem.namespaceURI if props.has_key(ns): props[ns].append(ename) else: props[ns] = [ename] namespaces.append(ns) #ReleaseNode(doc) return request_type, props, namespaces
def ReadDoc(): #Read in a doc r = PyExpat.Reader() global doc global ADDRBOOK global ENTRIES global PA global PA_NAME global PA_ADDR global PA_WORK global PA_FAX global PA_PAGER global PA_EMAIL global EN global EN_NAME global EN_ADDR global EN_WORK global EN_FAX global EN_PAGER global EN_EMAIL global VZ doc = r.fromUri(TEST_FILE) ADDRBOOK = doc.documentElement elementType = lambda n, nt=Node.ELEMENT_NODE: n.nodeType == nt ENTRIES = filter(elementType, ADDRBOOK.childNodes) PA = ENTRIES[0] children = filter(elementType, PA.childNodes) PA_NAME = children[0] PA_ADDR = children[1] PA_WORK = children[2] PA_FAX = children[3] PA_PAGER = children[4] PA_EMAIL = children[5] EN = ENTRIES[2] children = filter(elementType, EN.childNodes) EN_NAME = children[0] EN_ADDR = children[1] EN_WORK = children[2] EN_FAX = children[3] EN_PAGER = children[4] EN_EMAIL = children[5] VZ = ENTRIES[3]
def test(file=None): reader = PyExpat.Reader() if xml_file and os.path.exists(xml_file): s = open(file) else: s = addresses.xmlstream struct = reader.fromStream(s) s.close() # This icon map or something like it should probably be the default # for trees of DOMTreeNode's, but there's no mechanism for setting a # default yet. iconmap = { 'type': 'photo', Node.DOCUMENT_TYPE_NODE: ('doctype', 'doctypeplus', 'doctypeminus'), Node.COMMENT_NODE: ('comment', 'commentplus', 'commentminus'), Node.ELEMENT_NODE: ('xmlelt', 'xmleltplus', 'xmleltminus'), Node.ATTRIBUTE_NODE: ('xmlatt', 'xmlattplus', 'xmlattminus'), Node.TEXT_NODE: ('xmltext', 'xmltextplus', 'xmltextminus'), 'default': ('node', 'nodeplus', 'nodeminus') } win = Tk() win.title("DOM Tree") tt = TextTree(win, win, icons=iconmap, funcs={ 'showContent': dummy.showContent, 'showAtts': dummy.showAtts, 'glimpse': dummy.glimpse, 'unGlimpse': dummy.unGlimpse }) tt.pack(expand=YES, fill=BOTH) tt.showTree(struct, DT_XMLDOM, 1, props=NP_AUTOBUILD | NP_ALLOW_CHILDREN | NP_ABSTRACT, state=NS_EXPANDED) return (win, tt)
if not re.match( "\w+\.xml$", newFile ): print "Location: /error.html\n" sys.exit() else: # create forum files from xml files try: newForumFile = open( "../htdocs/XML/" + newFile, "w" ) forumsFile = open( "../htdocs/XML/forums.xml", "r+" ) templateFile = open( "../htdocs/XML/template.xml" ) except IOError: print "Location: /error.html\n" sys.exit() # parse forums document reader = PyExpat.Reader() document = reader.fromStream( forumsFile ) # add new forum element forum = document.createElement( "forum" ) forum.setAttribute( "filename", newFile ) name = document.createElement( "name" ) nameText = document.createTextNode( form[ "name" ].value ) name.appendChild( nameText ) forum.appendChild( name ) # obtain root element of forum documentNode = document.documentElement firstForum = documentNode.getElementsByTagName( "forum" )[ 0 ]
# Using 4DOM to traverse an XML Document. import sys from xml.dom.ext import StripXml from xml.dom.ext.reader import PyExpat from xml.parsers.expat import ExpatError # open XML file try: file = open("article2.xml") except IOError: sys.exit("Error opening file") # parse contents of XML file try: reader = PyExpat.Reader() # create Reader instance document = reader.fromStream(file) # parse XML document file.close() except ExpatError: sys.exit("Error processing XML file") # get root element rootElement = StripXml(document.documentElement) print "Here is the root element of the document: %s" % \ rootElement.nodeName # traverse all child nodes of root element print "The following are its child elements:" for node in rootElement.childNodes: print node.nodeName
def searchPathXml(search_path, xmlfile): reader = PyExpat.Reader() xml_file = open(os.path.join(PATH, xmlfile), "r") dom = reader.fromStream(xml_file) return Evaluate(search_path, dom.documentElement)
def load4DOM(): from xml.dom.ext.reader import PyExpat from xml.dom import Node reader = PyExpat.Reader() return reader
def DOMParseString(self, xml): reader = PyExpat.Reader() return reader.fromString(xml)
def bb_enable_iface_with_config(devcf): """ Configuration of eth interfaces via system-tools-backends. * Example of static interface configuration via system-tools-backends Function call: (sysret, xmlcfg, xmlcfgout) = bb_enable_iface_with_config('eth1', 'none', '10.0.0.6', '10.255.255.255', '10.0.0.1', '255.255.255.0', '10.0.0.0') xml passed to system-tools-backends: <?xml version='1.0' encoding='UTF-8' standalone='yes'?> <interface type='ethernet'> <configuration> <address>10.0.0.8</address> <auto>1</auto> <bootproto>none</bootproto> <broadcast>10.0.0.255</broadcast> <file>eth1</file> <gateway>10.0.0.1</gateway> <netmask>255.255.255.0</netmask> <network>10.0.0.0</network> </configuration> <dev>eth1</dev> <enabled>1</enabled> </interface> <!-- GST: end of request --> * Example of dhcp interface configuration via system-tools-backend Function call: (sysret, xmlcfg, xmlcfgout) = bb_enable_iface_with_config('eth1', 'dhcp') xml passed to system-tools-backends: <interface type='ethernet'> <configuration> <auto>1</auto> <bootproto>dhcp</bootproto> <file>eth0</file> </configuration> <dev>eth0</dev> <enabled>1</enabled> </interface> <!-- GST: end of request --> """ netcfg_xmlfile = tempfile.NamedTemporaryFile() netcfgout_xmlfile = tempfile.NamedTemporaryFile() writer = MarkupWriter(netcfg_xmlfile, encoding='UTF-8', indent=u"yes", standalone="yes") writer.startDocument() writer.startElement(u'interface') writer.attribute(u'type', u'ethernet') writer.startElement(u'configuration') if devcf.param['dhcp'] == 'True': conf = { 'auto' : '1', \ 'bootproto' : 'dhcp', \ 'file': devcf.param['eth_to_conf'] } else: broadcast = ipBroadcast(devcf.param['ip_computer'], devcf.param['mask_computer']) conf = { 'address' : devcf.param['ip_computer'], \ 'auto' : '1', \ 'bootproto' :'none', \ 'broadcast' : broadcast, \ 'file': devcf.param['eth_to_conf'], \ 'gateway' : devcf.param['gw_computer'], \ 'netmask' : devcf.param['mask_computer'], \ 'network' : devcf.param['net_computer']} for confparam in conf.keys(): writer.startElement(unicode(confparam)) writer.text(unicode(conf[confparam])) writer.endElement(unicode(confparam)) writer.endElement(u'configuration') writer.startElement(u'dev') writer.text(unicode(devcf.param['eth_to_conf'])) writer.endElement(u'dev') writer.startElement(u'enabled') writer.text(u'1') writer.endElement(u'enable') writer.endElement(u'interface') writer.text(u'\n') writer.comment(u' GST: end of request ') writer.endDocument() netcfg_xmlfile.flush() xmlcfg = open(netcfg_xmlfile.name).read( ) net_cfg_cmd = "cat " + netcfg_xmlfile.name + " | /usr/share/setup-tool-backends/scripts/network-conf -d enable_iface_with_config | grep -vE \"^$\" > " + netcfgout_xmlfile.name os.system(net_cfg_cmd) xmlcfgout = open(netcfgout_xmlfile.name).read( ) reader = PyExpat.Reader( ) dom = reader.fromStream(open(netcfgout_xmlfile.name, "r")) successCfg = Evaluate("success/text( )", dom.documentElement)[0].nodeValue if successCfg == '0': sysret = BBERRCFGDEV # Error configuration dev else: sysret = BBNOERR # Ok return (sysret, xmlcfg, xmlcfgout)
def processOper(fin, fout): """ A operation consist in a initial function a serial/ethernet over the operation is executed, a default timeout for commands. """ reader = PyExpat.Reader() if (fin != sys.stdin): dom = reader.fromStream(open(fin.name, "r")) else: dom = reader.fromStream(sys.stdin) cmd_ini = Evaluate("initial_func/text( )", dom.documentElement)[0].nodeValue default_timeout = Evaluate("default_timeout/text( )", dom.documentElement)[0].nodeValue sdelay = int( Evaluate("send_delay/text( )", dom.documentElement)[0].nodeValue) ethnode = Evaluate("eth_params", dom.documentElement) eth_dev = ethnode[0].getAttribute('dev') if len(eth_dev) != 0: by_serial = False ip = ethnode[0].getAttribute('ip') port = ethnode[0].getAttribute('port') else: by_serial = True tty_read = Evaluate("serial_params", dom.documentElement)[0].getAttribute('tty') baudrate_read = Evaluate( "serial_params", dom.documentElement)[0].getAttribute('baudrate') bits_read = Evaluate("serial_params", dom.documentElement)[0].getAttribute('bits') parity_read = Evaluate("serial_params", dom.documentElement)[0].getAttribute('parity') stopbits_read = Evaluate( "serial_params", dom.documentElement)[0].getAttribute('stopbits') xonxoff_read = Evaluate("serial_params", dom.documentElement)[0].getAttribute('xonxoff') rtscts_read = Evaluate("serial_params", dom.documentElement)[0].getAttribute('rtscts') if by_serial: if (parity_read == "N"): parity_cte = PARITY_NONE elif (parity_read == "E"): parity_cte = PARITY_EVEN elif (parity_read == "O"): parity_cte = PARITY_ODD ser = serial.Serial( port=int(tty_read), baudrate=int(baudrate_read), bytesize=int(bits_read), parity=parity_cte, stopbits=int(stopbits_read), timeout=None, # set a timeout value, None to wait forever xonxoff=int(xonxoff_read), # enable software flow control rtscts=int(rtscts_read), # enable RTS/CTS flow control writeTimeout=None, # set a timeout for writes ) fd = ser.fd if os.path.exists("/var/lock/LCK..ttyS" + tty_read): return BBERRLOCK lock = open("/var/lock/LCK..ttyS" + tty_read, "w") lock.write(str(os.getpid())) child = pexpect.spawn(fd) else: telnet_cmd = 'telnet ' + ip + " " + port child = pexpect.spawn(telnet_cmd) child.setlog(fout) cmd_act = cmd_ini try: ret_val = func_parse(dom, child, cmd_act, default_timeout, sdelay) finally: if by_serial: ser.close() os.remove("/var/lock/LCK..ttyS" + tty_read) return ret_val
import sys from xml.dom.ext.reader import PyExpat from xml.xpath import Evaluate subtype = "//conclusion/assigned/name" subtype_file = open(sys.argv[2],'w'); dom = PyExpat.Reader().fromUri(sys.argv[1]) elements = Evaluate(subtype, dom.documentElement) for element in elements: subtype_file.write(element.childNodes[0].data) subtype_file.close()