def GetPlayblastResolution(self, projectName): import xpath resolution = [0, 0] dom = xml.dom.minidom.parse( r'\\file-cluster\GDC\Resource\Support\bin\projects.xml') doc = dom.documentElement widths = xpath.findvalues( "//project[name='%s']/playblast/width" % projectName, doc) heights = xpath.findvalues( "//project[name='%s']/playblast/height" % projectName, doc) if len(widths) == 1 and len(heights) == 1: resolution[0] = int(widths[0]) resolution[1] = int(heights[0]) return resolution
def run_test(): doc = xml.dom.minidom.parse('/homespace/gaubert/RODD/src-data/130810-vprodnav/3.xml').documentElement # create context context = xpath.XPathContext() c = { 'gmi': "http://www.isotc211.org/2005/gmi", 'eum': "http://www.eumetsat.int/2008/gmi", 'gco': "http://www.isotc211.org/2005/gco", 'gmd': "http://www.isotc211.org/2005/gmd", "xsi": "http://www.w3.org/2001/XMLSchema-instance" } context.namespaces['gmi'] = "http://www.isotc211.org/2005/gmi" context.namespaces['eum'] = "http://www.eumetsat.int/2008/gmi" context.namespaces['gco'] = "http://www.isotc211.org/2005/gco" context.namespaces['gmd'] = "http://www.isotc211.org/2005/gmd" context.namespaces['xsi'] = "http://www.w3.org/2001/XMLSchema-instance" #result = xpath.find('gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c) result = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c) print("Result = %s\n" % (result)) result = context.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c) print("Result = %s\n" % (result))
def GetSetting(self, projectName, keyName): import xpath setting = '' dom = xml.dom.minidom.parse( r'\\file-cluster\GDC\Resource\Support\bin\projects.xml') doc = dom.documentElement settings = xpath.findvalues( "//project[name='%s']/%s" % (projectName, keyName), doc) if len(settings) == 1: setting = settings[0] else: projectName = 'Default' settings = xpath.findvalues( "//project[name='%s']/%s" % (projectName, keyName), doc) if len(settings) == 1: setting = settings[0] return setting
def get_value_from_xpath(xpath_sel, xml_doc): """This is a test helper method which gets an attribute name and returns its value from an xml fragment using xpath. """ xml_dom = parseString(xml_doc) try: return findvalues(xpath_sel, xml_dom)[0] except IndexError: raise Exception("%s not found in xml_document" % xpath_sel) except XPathTypeError: raise Exception("Invalid Xpath syntax")
def run_matcher(): """ check matcher """ context = { 'gmi': "http://www.isotc211.org/2005/gmi", 'eum': "http://www.eumetsat.int/2008/gmi", 'gco': "http://www.isotc211.org/2005/gco", 'gmd': "http://www.isotc211.org/2005/gmd", "xsi": "http://www.w3.org/2001/XMLSchema-instance" } for the_file in fs.dirwalk('/homespace/gaubert/RODD/src-data/130810-vprodnav/',"*.xml"): print("file = %s\n" % (the_file)) doc = xml.dom.minidom.parse(the_file).documentElement lid = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=context) print("[id:%s , path:%s]\n" % (lid[0], the_file)) lid = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=context) print("[id:%s , path:%s]\n" % (lid[0], the_file))
def GetTextureFormat(self, projectName): import xpath f = 'iff' dom = xml.dom.minidom.parse( r'\\file-cluster\GDC\Resource\Support\bin\projects.xml') doc = dom.documentElement formats = xpath.findvalues( "//project[name='%s']/texture/format" % projectName, doc) if len(formats) == 1: f = formats[0] return f
def test_not_escape(self): class F(Form): fields = [ Field('name', conv=convs.Char(), widget=self.widget(escape=False)) ] form = F(self.env) render = form.get_field('name').widget.render('<i>char display</i>') html = self.parse(render) value = ''.join(xpath.findvalues('.//*:%s/*:i/text()'%self.tag, html)) self.assertEqual(value, 'char display')
def find(self, query, context, get_value, charset, result_list): """ Appends to 'result_list' the result of applying the XPath query 'query' to the minidom Document 'context' 'get_value' (True/False) will determine whether to use 'xpath.find' or 'xpath.findValue' All the results are encoded using the specified 'charset' """ if get_value: xpath_result = findvalues(query, context) if xpath_result: for result in xpath_result: result_list.append(result.strip().encode(charset)) else: result_list.append(None) else: xpath_result = find(query, context) if xpath_result: for result in xpath_result: if result.nodeType == 2: if result.value.strip(): result_list.append(result.value.strip().encode(charset)) else: result_list.append(None) elif result.nodeType == 3 or result.nodeType == 4 or result.nodeType == 6 or result.nodeType == 7: if result.data.strip(): result_list.append(result.data.strip().encode(charset)) else: result_list.append(None) else: if result.toxml().strip(): result_list.append(result.toxml().strip().encode(charset)) else: result_list.append(None) else: result_list.append(None)
def get_tgs_codecover_raw(tar): ## overview / filenames information f = tar.extractfile('coverage/report.csv') reader = csv.reader(f) packages = set([]) name_to_full_name_map = {} ## short name -> array of full names for row in reader: if row[2] == 'package': packages.add(row[0]) if row[2] == 'class' and '.'.join(row[0].split('.')[:-1]) in packages: name = row[0].split('.')[-1] if name in name_to_full_name_map: l = name_to_full_name_map[name] else: l = [] l.append(row[0].replace('.', '/') + '.java') name_to_full_name_map[name] = l ## code coverage information f = tar.extractfile('coverage/report_html/report_single.html') tree = parse(f) ## next, read hyperlinking information from the overview table! tbody = xpath.find('//tbody[@class="overview"]', tree)[0] trs = [elem for elem in tbody.getElementsByTagName("tr")] first_tds = [tr.getElementsByTagName("td")[0] for tr in trs] first_tds_names = reduce( lambda a, b: a + b, [[(a.getAttribute("href"), a.firstChild.nodeValue.strip()) for a in td.getElementsByTagName("a")] for td in first_tds]) filtered_tds_names = [(x, y) for (x, y) in first_tds_names if y in name_to_full_name_map] xrefs = [ xpath.findnode('//a[@name="%s"]' % name[1:], tree) for (name, _) in filtered_tds_names ] code_hash = [myx.parentNode.parentNode.getAttribute('id') for myx in xrefs] regexp_match = [re.match('F(\d+)(L\d+)?', x) for x in code_hash] regexp_numbers = [ int(match.group(1)) if match else 0 for match in regexp_match ] zipped_numbers = zip(regexp_numbers, map(lambda (_, x): x, filtered_tds_names)) def relevant_numbers(fn): return [x for (x, y) in zipped_numbers if y == fn] #print name_to_full_name_map #print filtered_tds_names #print regexp_numbers #print zipped_numbers ## next build up this map fmap = { name: zip(name_to_full_name_map[name], relevant_numbers(name)) for name in name_to_full_name_map } #print fmap ## and the short fname map short_name_elems = [ s.replace('.java', '') for s in xpath.findvalues('//thead[@class="code"]/tr/th/text()', tree) ] #print short_name_elems ## lines = xpath.find('//tbody[@class="code"]/tr[@class="code"]/td[@class="code text"]', tree) ## parse lines def get_lines(): tbodys = xpath.find('//tbody[@class="code"]', tree) trs = reduce(lambda a, b: a + b, [[ elem for elem in tbody.getElementsByTagName("tr") if elem.getAttribute('class') == 'code' ] for tbody in tbodys]) tds = reduce(lambda a, b: a + b, [[ elem for elem in tr.getElementsByTagName("td") if elem.getAttribute('class') == 'code text' ] for tr in trs]) return tds lines = get_lines() result = [] for line in lines: lnumberStr = line.parentNode.getAttribute('id') if not lnumberStr.startswith('F'): lnumberStr = 'F0' + lnumberStr fnumber, lnumber = map(int, re.match(r'F(\d+)L(\d+)', lnumberStr).groups()) text = [] def get_text_nodes(n): if n.nodeType == line.TEXT_NODE: text.append(n.nodeValue) for child in n.childNodes: get_text_nodes(child) get_text_nodes(line) code = ''.join(text).strip() is_unreachable_in_bytecode = code in ["continue;", "break;"] fully_cvrd, partially_cvrd, not_cvrd = [ len(xpath.find('span[contains(@class, "%s")]' % token, line)) > 0 for token in ("fullyCovered", "partlyCovered", "notCovered") ] terms_only = all( len( xpath.find('span[contains(@class, "%s_Coverage")]' % token, line)) == 0 for token in ("Loop", "Branch", "Statement", "Operator")) branches_only = all( ## Terms are allowed too, e.g., } else if { ... len( xpath.find('span[contains(@class, "%s_Coverage")]' % token, line)) == 0 for token in ("Loop", "Statement", "Operator")) ## ok now, this is ugly:::: this_line_short_fname = short_name_elems[fnumber] #print fmap[this_line_short_fname], fnumber ## search in the fmap for the last item that has idx <= this fnumber!!!! this_line_full_name = [ full for (full, idx) in fmap[this_line_short_fname] if idx <= fnumber ][-1] result.append( ((this_line_full_name, lnumber), fully_cvrd, partially_cvrd, not_cvrd, terms_only, branches_only, is_unreachable_in_bytecode)) return result
stopwords.update(nltk_stopwords) for event, node in events: if event == 'START_ELEMENT' and node.tagName == 'page': x += 1 events.expandNode(node) # node now contains a dom fragment title = xpath.findvalue('title', node) title = re.sub("[\(|].*?[\)]", "", title).strip().lower() if len(title.split()) > 1 or any(bad in title for bad in bad_list): continue title = re.sub(r'[\W]+', "", title) revision = xpath.findvalue('revision', node) text = xpath.findvalues('revision/text', node) wiki_parsed = wtp.parse(text[0]).sections[0] wiki_parsed_str = str(wiki_parsed) for table in wiki_parsed.tables: wiki_parsed_str = wiki_parsed_str.replace(str(table), "") for tmpl in wiki_parsed.templates: wiki_parsed_str = wiki_parsed_str.replace(str(tmpl), "") for ref in wiki_parsed.get_tags(): wiki_parsed_str = wiki_parsed_str.replace(str(ref), '') for link in wiki_parsed.wikilinks: wiki_parsed_str = wiki_parsed_str.replace(str(link), link.title) wiki_parts = wiki_parsed_str.strip().split('\n') for part in wiki_parts: if any(sep in part for sep in sep_list): definition = custom_split(sep_list, part)[1]
def get_value(self, html): return ''.join(xpath.findvalues('.//*:%s/text()'%self.tag, html))
def print_filetype_tree(): """ Print all filetypes as a tree """ context = { 'gmi': "http://www.isotc211.org/2005/gmi", 'eum': "http://www.eumetsat.int/2008/gmi", 'gco': "http://www.isotc211.org/2005/gco", 'gmd': "http://www.isotc211.org/2005/gmd", "xsi": "http://www.w3.org/2001/XMLSchema-instance" } out = StringIO.StringIO() filtered = StringIO.StringIO() different_availabilities = set() for file in fs.dirwalk('/homespace/gaubert/RODD/src-data/130810-vprodnav/',"*.xml"): #print("file = %s\n" % (file)) doc = xml.dom.minidom.parse(file).documentElement #doc = elementtree.ElementTree.parse(file) fileidentifier = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=context) #out.write("+-%s:%s\n" % (fileidentifier[0], os.path.basename(file))) filename_written = False digitaltransfers = xpath.find('//eum:digitalTransfers/eum:MD_EUMDigitalTransfer', doc, namespaces=context) for elem in digitaltransfers: #get availability value list_of_elems = get_nodes_with("/availability/MD_EUMDigitalTransferOptions/availability/CharacterString", elem.childNodes) if len(list_of_elems) > 1: raise Exception("Error too many elements found") availability_type = " ".join(t.nodeValue for t in list_of_elems[0].childNodes if t.nodeType == t.TEXT_NODE) different_availabilities.add(availability_type.strip()) # get list of channels list_of_channels = get_nodes_with("/availability/MD_EUMDigitalTransferOptions/eumetcastChannels/CharacterString", elem.childNodes) chans = "" for ch in list_of_channels: chans += " ".join(t.nodeValue for t in ch.childNodes if t.nodeType == t.TEXT_NODE) #print("chans = %s\n" %(chans)) if contains(availability_type, ['EUMETCAST','GTS','DIRECT']): #write name if not filename_written: out.write("+-%s:%s:ch=[%s]\n" % (fileidentifier[0], os.path.basename(file),chans)) filename_written = False # get associated formats to this type #if contains(availability_type,["EUMETCAST","GEONETCAST", ] ): format_list = get_nodes_with("/format/MD_EUMFormat", elem.childNodes) for e in format_list: dummy_list = get_nodes_with("/name/CharacterString", e.childNodes) dum_node = dummy_list[0] name = " ".join(t.nodeValue for t in dum_node.childNodes if t.nodeType == t.TEXT_NODE) dummy_list = get_nodes_with("/typicalFilename/CharacterString", e.childNodes) typicalfilenames = [] for dum_node in dummy_list: typicalfilenames.append(" ".join(t.nodeValue for t in dum_node.childNodes if t.nodeType == t.TEXT_NODE)) out.write(" \__(%s:%s)\n" % (availability_type, name.strip()) ) for n in typicalfilenames: out.write(" \__%s\n" % (n.strip())) else: #write name if not filename_written: filtered.write("+-%s:%s\n" % (fileidentifier[0], os.path.basename(file))) filename_written = False # get associated formats to this type #if contains(availability_type,["EUMETCAST","GEONETCAST", ] ): format_list = get_nodes_with("/format/MD_EUMFormat", elem.childNodes) for e in format_list: dummy_list = get_nodes_with("/name/CharacterString", e.childNodes) dum_node = dummy_list[0] name = " ".join(t.nodeValue for t in dum_node.childNodes if t.nodeType == t.TEXT_NODE) dummy_list = get_nodes_with("/typicalFilename/CharacterString", e.childNodes) typicalfilenames = [] for dum_node in dummy_list: typicalfilenames.append(" ".join(t.nodeValue for t in dum_node.childNodes if t.nodeType == t.TEXT_NODE)) filtered.write(" \__(%s:%s)\n" % (availability_type, name.strip()) ) for n in typicalfilenames: filtered.write(" \__%s\n" % (n.strip())) #print("name = %s ; filesnames = %s\n" % (name, typicalfilenames) ) #print(out.getvalue()) out.write("-------------------------------------------------------------------\n") out.write("Availabilities type:\n") for av in different_availabilities: out.write("- %s\n" % av) o_file= open("/tmp/dissemination-tree.txt", "w") o_file.write(out.getvalue()) o_file.close() o_file= open("/tmp/filtered-tree.txt", "w") o_file.write(filtered.getvalue()) o_file.close()