Exemplo n.º 1
0
    def GetPlayblastResolution(self, projectName):
        import xpath
        resolution = [0, 0]

        dom = xml.dom.minidom.parse(
            r'\\file-cluster\GDC\Resource\Support\bin\projects.xml')
        doc = dom.documentElement
        widths = xpath.findvalues(
            "//project[name='%s']/playblast/width" % projectName, doc)
        heights = xpath.findvalues(
            "//project[name='%s']/playblast/height" % projectName, doc)
        if len(widths) == 1 and len(heights) == 1:
            resolution[0] = int(widths[0])
            resolution[1] = int(heights[0])

        return resolution
Exemplo n.º 2
0
def run_test():
    doc = xml.dom.minidom.parse('/homespace/gaubert/RODD/src-data/130810-vprodnav/3.xml').documentElement
    
    # create context
    context = xpath.XPathContext()
    
    c = { 'gmi': "http://www.isotc211.org/2005/gmi",
          'eum': "http://www.eumetsat.int/2008/gmi",
          'gco': "http://www.isotc211.org/2005/gco",
          'gmd': "http://www.isotc211.org/2005/gmd",
          "xsi": "http://www.w3.org/2001/XMLSchema-instance"
        }
    
    context.namespaces['gmi'] = "http://www.isotc211.org/2005/gmi"
    context.namespaces['eum'] = "http://www.eumetsat.int/2008/gmi"
    context.namespaces['gco'] = "http://www.isotc211.org/2005/gco"
    context.namespaces['gmd'] = "http://www.isotc211.org/2005/gmd"
    context.namespaces['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
    
    
    
    #result = xpath.find('gmd:MD_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c)
    result = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c)
    print("Result = %s\n" % (result))
    
    result = context.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=c)
    print("Result = %s\n" % (result))
Exemplo n.º 3
0
    def GetSetting(self, projectName, keyName):
        import xpath
        setting = ''

        dom = xml.dom.minidom.parse(
            r'\\file-cluster\GDC\Resource\Support\bin\projects.xml')
        doc = dom.documentElement
        settings = xpath.findvalues(
            "//project[name='%s']/%s" % (projectName, keyName), doc)
        if len(settings) == 1:
            setting = settings[0]
        else:
            projectName = 'Default'
            settings = xpath.findvalues(
                "//project[name='%s']/%s" % (projectName, keyName), doc)
            if len(settings) == 1:
                setting = settings[0]

        return setting
Exemplo n.º 4
0
def get_value_from_xpath(xpath_sel, xml_doc):
    """This is a test helper method which gets an attribute name and returns
    its value from an xml fragment using xpath.
    """
    xml_dom = parseString(xml_doc)
    try:
        return findvalues(xpath_sel, xml_dom)[0]
    except IndexError:
        raise Exception("%s not found in xml_document" % xpath_sel)
    except XPathTypeError:
        raise Exception("Invalid Xpath syntax")
Exemplo n.º 5
0
def run_matcher():
    """
       check matcher
    """
    context =  { 'gmi': "http://www.isotc211.org/2005/gmi",
                 'eum': "http://www.eumetsat.int/2008/gmi",
                 'gco': "http://www.isotc211.org/2005/gco",
                 'gmd': "http://www.isotc211.org/2005/gmd",
                 "xsi": "http://www.w3.org/2001/XMLSchema-instance"
               }
    
    for the_file in fs.dirwalk('/homespace/gaubert/RODD/src-data/130810-vprodnav/',"*.xml"):
        
        print("file = %s\n" % (the_file))
        doc = xml.dom.minidom.parse(the_file).documentElement
    
        lid = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=context)
        print("[id:%s , path:%s]\n" % (lid[0], the_file))
        
        lid = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=context)
        print("[id:%s , path:%s]\n" % (lid[0], the_file))
Exemplo n.º 6
0
    def GetTextureFormat(self, projectName):
        import xpath
        f = 'iff'

        dom = xml.dom.minidom.parse(
            r'\\file-cluster\GDC\Resource\Support\bin\projects.xml')
        doc = dom.documentElement
        formats = xpath.findvalues(
            "//project[name='%s']/texture/format" % projectName, doc)
        if len(formats) == 1:
            f = formats[0]

        return f
Exemplo n.º 7
0
    def test_not_escape(self):
        class F(Form):
            fields = [
                Field('name',
                      conv=convs.Char(),
                      widget=self.widget(escape=False))
            ]

        form = F(self.env)

        render = form.get_field('name').widget.render('<i>char display</i>')
        html = self.parse(render)
        value = ''.join(xpath.findvalues('.//*:%s/*:i/text()'%self.tag, html))
        self.assertEqual(value, 'char display')
Exemplo n.º 8
0
	def find(self, query, context, get_value, charset, result_list):

		"""
		Appends to 'result_list' the result of applying the XPath query 'query' to the minidom Document 'context'
		'get_value' (True/False) will determine whether to use 'xpath.find' or 'xpath.findValue'

		All the results are encoded using the specified 'charset'
		"""

		if get_value:

			xpath_result = findvalues(query, context)

			if xpath_result:

				for result in xpath_result:

					result_list.append(result.strip().encode(charset))

			else:

				result_list.append(None)

		else:

			xpath_result = find(query, context)

			if xpath_result:

				for result in xpath_result:

					if result.nodeType == 2:

						if result.value.strip():

							result_list.append(result.value.strip().encode(charset))

						else:

							result_list.append(None)

					elif result.nodeType == 3 or result.nodeType == 4 or result.nodeType == 6 or result.nodeType == 7:

						if result.data.strip():

							result_list.append(result.data.strip().encode(charset))

						else:

							result_list.append(None)

					else:

						if result.toxml().strip():

							result_list.append(result.toxml().strip().encode(charset))

						else:

							result_list.append(None)

			else:

				result_list.append(None)
Exemplo n.º 9
0
def get_tgs_codecover_raw(tar):
    ## overview / filenames information
    f = tar.extractfile('coverage/report.csv')
    reader = csv.reader(f)
    packages = set([])
    name_to_full_name_map = {}  ## short name -> array of full names

    for row in reader:
        if row[2] == 'package':
            packages.add(row[0])
        if row[2] == 'class' and '.'.join(row[0].split('.')[:-1]) in packages:
            name = row[0].split('.')[-1]
            if name in name_to_full_name_map:
                l = name_to_full_name_map[name]
            else:
                l = []
            l.append(row[0].replace('.', '/') + '.java')
            name_to_full_name_map[name] = l

    ## code coverage information
    f = tar.extractfile('coverage/report_html/report_single.html')
    tree = parse(f)

    ## next, read hyperlinking information from the overview table!
    tbody = xpath.find('//tbody[@class="overview"]', tree)[0]
    trs = [elem for elem in tbody.getElementsByTagName("tr")]
    first_tds = [tr.getElementsByTagName("td")[0] for tr in trs]

    first_tds_names = reduce(
        lambda a, b: a + b,
        [[(a.getAttribute("href"), a.firstChild.nodeValue.strip())
          for a in td.getElementsByTagName("a")] for td in first_tds])

    filtered_tds_names = [(x, y) for (x, y) in first_tds_names
                          if y in name_to_full_name_map]

    xrefs = [
        xpath.findnode('//a[@name="%s"]' % name[1:], tree)
        for (name, _) in filtered_tds_names
    ]
    code_hash = [myx.parentNode.parentNode.getAttribute('id') for myx in xrefs]
    regexp_match = [re.match('F(\d+)(L\d+)?', x) for x in code_hash]
    regexp_numbers = [
        int(match.group(1)) if match else 0 for match in regexp_match
    ]
    zipped_numbers = zip(regexp_numbers,
                         map(lambda (_, x): x, filtered_tds_names))

    def relevant_numbers(fn):
        return [x for (x, y) in zipped_numbers if y == fn]

    #print name_to_full_name_map
    #print filtered_tds_names
    #print regexp_numbers
    #print zipped_numbers

    ## next build up this map
    fmap = {
        name: zip(name_to_full_name_map[name], relevant_numbers(name))
        for name in name_to_full_name_map
    }

    #print fmap

    ## and the short fname map
    short_name_elems = [
        s.replace('.java', '')
        for s in xpath.findvalues('//thead[@class="code"]/tr/th/text()', tree)
    ]

    #print short_name_elems

    ## lines = xpath.find('//tbody[@class="code"]/tr[@class="code"]/td[@class="code text"]', tree)
    ## parse lines
    def get_lines():
        tbodys = xpath.find('//tbody[@class="code"]', tree)
        trs = reduce(lambda a, b: a + b, [[
            elem for elem in tbody.getElementsByTagName("tr")
            if elem.getAttribute('class') == 'code'
        ] for tbody in tbodys])
        tds = reduce(lambda a, b: a + b, [[
            elem for elem in tr.getElementsByTagName("td")
            if elem.getAttribute('class') == 'code text'
        ] for tr in trs])
        return tds

    lines = get_lines()
    result = []
    for line in lines:
        lnumberStr = line.parentNode.getAttribute('id')
        if not lnumberStr.startswith('F'):
            lnumberStr = 'F0' + lnumberStr

        fnumber, lnumber = map(int,
                               re.match(r'F(\d+)L(\d+)', lnumberStr).groups())

        text = []

        def get_text_nodes(n):
            if n.nodeType == line.TEXT_NODE:
                text.append(n.nodeValue)
            for child in n.childNodes:
                get_text_nodes(child)

        get_text_nodes(line)
        code = ''.join(text).strip()
        is_unreachable_in_bytecode = code in ["continue;", "break;"]

        fully_cvrd, partially_cvrd, not_cvrd = [
            len(xpath.find('span[contains(@class, "%s")]' % token, line)) > 0
            for token in ("fullyCovered", "partlyCovered", "notCovered")
        ]
        terms_only = all(
            len(
                xpath.find('span[contains(@class, "%s_Coverage")]' %
                           token, line)) == 0
            for token in ("Loop", "Branch", "Statement", "Operator"))
        branches_only = all(  ## Terms are allowed too, e.g., } else if { ...
            len(
                xpath.find('span[contains(@class, "%s_Coverage")]' % token,
                           line)) == 0
            for token in ("Loop", "Statement", "Operator"))

        ## ok now, this is ugly::::
        this_line_short_fname = short_name_elems[fnumber]
        #print fmap[this_line_short_fname], fnumber
        ## search in the fmap for the last item that has idx <= this fnumber!!!!
        this_line_full_name = [
            full for (full, idx) in fmap[this_line_short_fname]
            if idx <= fnumber
        ][-1]

        result.append(
            ((this_line_full_name, lnumber), fully_cvrd, partially_cvrd,
             not_cvrd, terms_only, branches_only, is_unreachable_in_bytecode))
    return result
stopwords.update(nltk_stopwords)

for event, node in events:
    if event == 'START_ELEMENT' and node.tagName == 'page':
        x += 1

        events.expandNode(node)  # node now contains a dom fragment
        title = xpath.findvalue('title', node)
        title = re.sub("[\(|].*?[\)]", "", title).strip().lower()
        if len(title.split()) > 1 or any(bad in title for bad in bad_list):
            continue
        title = re.sub(r'[\W]+', "", title)

        revision = xpath.findvalue('revision', node)
        text = xpath.findvalues('revision/text', node)
        wiki_parsed = wtp.parse(text[0]).sections[0]
        wiki_parsed_str = str(wiki_parsed)
        for table in wiki_parsed.tables:
            wiki_parsed_str = wiki_parsed_str.replace(str(table), "")
        for tmpl in wiki_parsed.templates:
            wiki_parsed_str = wiki_parsed_str.replace(str(tmpl), "")
        for ref in wiki_parsed.get_tags():
            wiki_parsed_str = wiki_parsed_str.replace(str(ref), '')
        for link in wiki_parsed.wikilinks:
            wiki_parsed_str = wiki_parsed_str.replace(str(link), link.title)

        wiki_parts = wiki_parsed_str.strip().split('\n')
        for part in wiki_parts:
            if any(sep in part for sep in sep_list):
                definition = custom_split(sep_list, part)[1]
Exemplo n.º 11
0
 def get_value(self, html):
     return ''.join(xpath.findvalues('.//*:%s/text()'%self.tag, html))
Exemplo n.º 12
0
def print_filetype_tree():
    """
       Print all filetypes as a tree
    """
    context =  { 'gmi': "http://www.isotc211.org/2005/gmi",
                'eum': "http://www.eumetsat.int/2008/gmi",
                'gco': "http://www.isotc211.org/2005/gco",
                'gmd': "http://www.isotc211.org/2005/gmd",
                "xsi": "http://www.w3.org/2001/XMLSchema-instance"
              }
    
    out = StringIO.StringIO()
    filtered = StringIO.StringIO()
    
    different_availabilities = set()
    
    for file in fs.dirwalk('/homespace/gaubert/RODD/src-data/130810-vprodnav/',"*.xml"):
        
        #print("file = %s\n" % (file))
        doc = xml.dom.minidom.parse(file).documentElement
        #doc = elementtree.ElementTree.parse(file)
        
        fileidentifier   = xpath.findvalues('/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString', doc, namespaces=context)

        #out.write("+-%s:%s\n" % (fileidentifier[0], os.path.basename(file)))
        filename_written = False

        digitaltransfers = xpath.find('//eum:digitalTransfers/eum:MD_EUMDigitalTransfer', doc, namespaces=context)
        
        for elem in digitaltransfers:
            
            #get availability value
            list_of_elems = get_nodes_with("/availability/MD_EUMDigitalTransferOptions/availability/CharacterString", elem.childNodes)
            
            if len(list_of_elems) > 1:
                raise Exception("Error too many elements found")
            
            availability_type = " ".join(t.nodeValue for t in list_of_elems[0].childNodes if t.nodeType == t.TEXT_NODE)
            different_availabilities.add(availability_type.strip())
            
            # get list of channels
            list_of_channels = get_nodes_with("/availability/MD_EUMDigitalTransferOptions/eumetcastChannels/CharacterString", elem.childNodes)
            chans = ""
            for ch in list_of_channels:
                chans += " ".join(t.nodeValue for t in ch.childNodes if t.nodeType == t.TEXT_NODE)
            #print("chans = %s\n" %(chans))
            if contains(availability_type, ['EUMETCAST','GTS','DIRECT']): 
                
                #write name
                if not filename_written:
                    out.write("+-%s:%s:ch=[%s]\n" % (fileidentifier[0], os.path.basename(file),chans))
                    filename_written = False
                    
                # get associated formats to this type
                #if contains(availability_type,["EUMETCAST","GEONETCAST", ] ):
                format_list = get_nodes_with("/format/MD_EUMFormat", elem.childNodes)
                
                for e in format_list:
                    
                    dummy_list = get_nodes_with("/name/CharacterString", e.childNodes)
                    
                    dum_node = dummy_list[0]
                    name = " ".join(t.nodeValue for t in dum_node.childNodes if t.nodeType == t.TEXT_NODE)
                    
                    dummy_list = get_nodes_with("/typicalFilename/CharacterString", e.childNodes)
                    
                    typicalfilenames = []
                    for dum_node in dummy_list:
                        typicalfilenames.append(" ".join(t.nodeValue for t in dum_node.childNodes if t.nodeType == t.TEXT_NODE))
                    
                    out.write(" \__(%s:%s)\n" % (availability_type, name.strip()) )
                    for n in typicalfilenames:
                        out.write("     \__%s\n" % (n.strip()))
            else:
                
                #write name
                if not filename_written:
                    filtered.write("+-%s:%s\n" % (fileidentifier[0], os.path.basename(file)))
                    filename_written = False
                    
                # get associated formats to this type
                #if contains(availability_type,["EUMETCAST","GEONETCAST", ] ):
                format_list = get_nodes_with("/format/MD_EUMFormat", elem.childNodes)
                
                for e in format_list:
                    
                    dummy_list = get_nodes_with("/name/CharacterString", e.childNodes)
                    
                    dum_node = dummy_list[0]
                    name = " ".join(t.nodeValue for t in dum_node.childNodes if t.nodeType == t.TEXT_NODE)
                    
                    dummy_list = get_nodes_with("/typicalFilename/CharacterString", e.childNodes)
                    
                    typicalfilenames = []
                    for dum_node in dummy_list:
                        typicalfilenames.append(" ".join(t.nodeValue for t in dum_node.childNodes if t.nodeType == t.TEXT_NODE))
                    
                    filtered.write(" \__(%s:%s)\n" % (availability_type, name.strip()) )
                    for n in typicalfilenames:
                        filtered.write("     \__%s\n" % (n.strip()))
                    
                #print("name = %s ; filesnames = %s\n" % (name, typicalfilenames) )
            
            
         
    #print(out.getvalue())
    out.write("-------------------------------------------------------------------\n")
    out.write("Availabilities type:\n")
    for av in different_availabilities:
        out.write("- %s\n" % av)
    o_file= open("/tmp/dissemination-tree.txt", "w")
    o_file.write(out.getvalue())
    o_file.close() 
    
    o_file= open("/tmp/filtered-tree.txt", "w")
    o_file.write(filtered.getvalue())
    o_file.close()