def searchDocuments(self, user, url="", namespaces={}, xpath_="", range_=slice(0, -1), depth=-1, version="", restore=False): """ Returns selected documents. >>> store = XMLStorage(spock=minidom.parseString("<restaurant><meal><eggs /><spam /></meal><meal><eggs /><spam /><spam /></meal><foo bar='42'>6*7</foo></restaurant>")) >>> et.tostring(store.searchDocuments("spock")[0]) '<restaurant><meal><eggs /><spam /></meal><meal><eggs /><spam /><spam /></meal><foo bar="42">6*7</foo></restaurant>' >>> list(et.tostring(el) for el in store.searchDocuments("spock", xpath_="//spam")) ['<spam />', '<spam />'] """ xml = self.data[user] xctxt = xpath.XPathContext() if url: xml = xctxt.find("//*[xmlu:src='{}']".format(url), xml)[0] results = xctxt.find(xpath_, xml, ** namespaces)[range_] if xpath_ else (xml, ) return tuple(et.XML(el.toxml()) for el in results) # Generator causes issues
def multitest(self, expr, **kwargs): functions = ['find', 'findnode', 'findvalue', 'findvalues'] results = {} context = xpath.XPathContext(**kwargs) compiled = xpath.XPath(expr) def invoke(obj, func, *args, **kwargs): try: return getattr(obj, func)(*args, **kwargs) except xpath.XPathError as e: return e.__class__ for f in functions: results[f] = invoke(xpath, f, expr, self.doc, **kwargs) self.assertEqual(results[f], invoke(compiled, f, self.doc, **kwargs)) self.assertEqual(results[f], invoke(context, f, expr, self.doc, **kwargs)) #results[f] = getattr(xpath, f)(expr, self.doc, **kwargs) #self.assertEqual(results[f], # getattr(compiled, f)(self.doc, **kwargs)) #self.assertEqual(results[f], # getattr(context, f)(expr, self.doc, **kwargs)) return results
def setUp(self): self.doc = xml.dom.minidom.parseString(self.xml) self.docns = xml.dom.minidom.parseString(self.xmlns) self.context = xpath.XPathContext( default_namespace='http://a.example.com', namespaces={ 'b' : 'http://b.example.com' })
def run_test(): doc = xml.dom.minidom.parse('/homespace/gaubert/RODD/src-data/130810-vprodnav/3.xml').documentElement # create context context = xpath.XPathContext() c = { 'gmi': "http://www.isotc211.org/2005/gmi", 'eum': "http://www.eumetsat.int/2008/gmi", 'gco': "http://www.isotc211.org/2005/gco", 'gmd': "http://www.isotc211.org/2005/gmd", "xsi": "http://www.w3.org/2001/XMLSchema-instance" } context.namespaces['gmi'] = "http://www.isotc211.org/2005/gmi" context.namespaces['eum'] = "http://www.eumetsat.int/2008/gmi" context.namespaces['gco'] = "http://www.isotc211.org/2005/gco" context.namespaces['gmd'] = "http://www.isotc211.org/2005/gmd" context.namespaces['xsi'] = "http://www.w3.org/2001/XMLSchema-instance" #result = xpath.find('gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c) result = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c) print("Result = %s\n" % (result)) result = context.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c) print("Result = %s\n" % (result))
def setUp(self): self.doc = xml.dom.minidom.parseString(self.xml) self.context = xpath.XPathContext() self.context.variables['start'] = 2 self.context.variables['end'] = '4' self.context.variables[('http://anaconda.python.org', 'start')] = 3 self.context.namespaces['ana'] = 'http://anaconda.python.org'
def get(self, *args): character = args[1] or "" if not character: return self.ok("Please provide a UTF-8 character.") character = urllib.unquote(character) try: unic = character.decode("utf8") except: return self.ok("Error decoding UTF-8 character.") if len(unic) > 1: # Search for the page. rv = api.urlfetch.fetch( "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&safe=off&q=site:fileformat.info%%20%s" % urllib.quote(unic)).content js = simplejson.loads(rv) rv = str(js['responseData']['results'][0]['titleNoFormatting']) rv = re.sub(r'&#(\d+);', (lambda m: unichr(int(m.group(1)))), rv) m = re.search(r'U\+([0-9a-fA-F]+)', rv) rv += " - " + js['responseData']['results'][0]['url'] if m: rv += ' (%s)' % (unichr(int(m.group(1), 16))) return self.ok(rv) try: name = unicodedata.name(unic[0]) except: # Our unicodedata was lacking; let's ask fileformat.info import html5lib, xpath, StringIO url = URI % ord(unic[0]) bytes = api.urlfetch.fetch(url).content fp = StringIO.StringIO(bytes) pars = None try: pars = html5lib.parse(fp, treebuilder="dom") except Exception, e: name = "(No name found)" if not pars: name = "(No name found)" else: con = xpath.XPathContext() con.namespaces["x"] = "http://www.w3.org/1999/xhtml" it = con.find("//x:title//text()", pars.documentElement) if not it: name = "(No name found)" else: name = reduce((lambda x, y: x + y), map((lambda x: x.data), it)) name = " ".join(name.split()) m = re.search(r"'(.*?)'", name) if m: name = m.group(1)
def getXpathList(node, queryList): label = [] context = xpath.XPathContext(node) for queryLine in queryList: returnList = [] for query in queryLine: queryReturn = context.find(query, node) if isinstance(queryReturn, unicode): returnList.append([queryReturn]) else: returnList.append([attr.nodeValue for attr in queryReturn]) label.append( [list(row) for row in izip_longest(*returnList, fillvalue=u'')]) return label
def __init__(self, filename, base_xpath="//entries"): #parse the document into a DOM tree # Error handling here is based on: http://stackoverflow.com/questions/192907/xml-parsing-elementtree-vs-sax-and-dom try: self.tree = rdf_tree = MD.parse(filename) except xml.parsers.expat.ExpatError as e: msg = "Error: The data file is not proper XML! Cannot continue." + \ "[XML] Error (line {}): {}\n".format(e.lineno, e.code) + \ "[XML] Offset: {}".format(e.offset) L.error(msg) raise XmlError(msg) except IOError as e: msg = "[IO] I/O Error {}: {}".format(e.errno, e.strerror) L.error(msg) raise #read the default namespace and prefix from the root node self.context = xpath.XPathContext(rdf_tree) self.entries = self.context.find(base_xpath, rdf_tree)
def setUp(self): self.doc = xml.dom.minidom.parseString(self.xml) self.context = xpath.XPathContext( namespaces={'a':'http://www.example.com/a'})
from google.appengine import api import base def do_generic_parse(url, xpth): url=url.replace(' ','%20') thepage=api.urlfetch.fetch(url).content fp=StringIO.StringIO(thepage) try: pars=html5lib.parse(fp, treebuilder="dom") except Exception, e: return "something failed: %s (%s)."%(str(e),str(type(e))) if not pars: return "Parsing failed for some reason" con=xpath.XPathContext() con.namespaces["x"]="http://www.w3.org/1999/xhtml" it=con.find(xpth+"//text()", pars.documentElement) if not it: return "ENOTFOUND" stuff=reduce((lambda x,y: x+y), map((lambda x: x.data), it)) stuff=stuff.replace("\n"," ").strip() return stuff[:200] class Main(base.RequestHandler): def get(self,*args): stuff=os.environ['PATH_INFO'] a=stuff.split('/',2) stuff=a[2] try:
def setUpClass(cls): cls.xpathctx = xpath.XPathContext() cls.xpathctx.namespaces['gml'] = 'http://www.opengis.net/gml' cls.xpathctx.namespaces['gsf'] = 'http://geoscript.org/feature'
def __init__(self, test, node): self.test = test self.node = node self.ctx = xpath.XPathContext(node)
def gettranscode(self): context = xpath.XPathContext() return context.find(self._config.trancodexpath, self.dom)[0].childNodes[0].nodeValue
def setUp(self): self.doc = xml.dom.minidom.parseString(self.xml) self.context = xpath.XPathContext()
def test_explicit_document_context_prefix(self): nsdoc = xml.dom.minidom.parseString( """<doc xmlns:pork="http://porcupine.example.org/" />""") context = xpath.XPathContext(nsdoc) result = context.findvalues('//pork:item', self.doc) self.assertEqual(result, ['porcupine'])
def test_empty_context(self): context = xpath.XPathContext() result = context.findvalues('//item', self.doc) self.assertEqual(result, [])
def getserialno(self): context = xpath.XPathContext() return context.find(self._config.serialnoxpath, self.dom)[0].childNodes[0].nodeValue