示例#1
0
 def from_dataframe(cls, df, **kwargs):
     if isinstance(df, pandas.DataFrame):
         return xml.etree.ElementTree.fromstring(
             df.to_html(**kwargs),
             parser=XMLParser(target=TreeBuilder(element_factory=cls)))
     elif isinstance(df, pandas.io.formats.style.Styler):
         render = df.render()
         render = re.sub(r"colspan=([1234567890]*)>", "colspan=\"\g<1>\">",
                         render, 0)
         try:
             return xml.etree.ElementTree.fromstring(
                 f"<div>{render}</div>",
                 parser=XMLParser(target=TreeBuilder(element_factory=cls)))
         except Exception as parse_err:
             if type(parse_err).__name__ == 'ParseError':
                 x = Elem('div')
                 x << xml.etree.ElementTree.fromstring(
                     df.data.to_html(**kwargs),
                     parser=XMLParser(target=TreeBuilder(
                         element_factory=cls)))
                 x << Elem('pre', text=render)
                 x << Elem('pre', text=str(parse_err))
                 return x
             else:
                 raise
示例#2
0
def compare_vtk(vtk1,
                vtk2,
                absolute=1.2e-7,
                relative=1e-2,
                zeroValueThreshold={},
                verbose=True):
    """ Take two vtk files and fuzzy compare them. Returns an exit key as return value.

    :param vtk1: The filename of the vtk files to compare
    :type vtk1: string
    :param vtk2: The filename of the vtk files to compare
    :type vtk2: string

    :param absolute: The epsilon used for comparing numbers with an absolute criterion
    :type absolute: float

    :param relative: The epsilon used for comparing numbers with an relative criterion
    :type relative: float

    :param zeroValueThreshold: A dictionary of parameter value pairs that set the threshold under
                               which a number is treated as zero for a certain parameter. Use this parameter if
                               you have to avoid comparisons of very small numbers for a certain parameter.
    :type zeroValueThreshold: dict

    :param verbose: If the script should produce informative output. Enabled by default as the details
                    give the tester a lot more information on why tests fail.
    :type verbose: bool
    """

    # construct element tree from vtk file
    root1 = ET.parse(vtk1, parser=XMLParser(target=VTKTreeBuilder())).getroot()
    root2 = ET.parse(vtk2, parser=XMLParser(target=VTKTreeBuilder())).getroot()

    # sort the vtk file in case nodes appear in different positions
    # e.g. because of minor changes in the output code
    sortedroot1 = sort_vtk(root1)
    sortedroot2 = sort_vtk(root2)

    if verbose:
        print("Comparing {} and {}".format(vtk1, vtk2))
        print(
            "...with a maximum relative error of {} and a maximum absolute error of {}*p_max, where p_max is highest absolute parameter value."
            .format(relative, absolute))

    # sort the vtk file so that the comparison is independent of the
    # index numbering (coming e.g. from different grid managers)
    sortedroot1, sortedroot2 = sort_vtk_by_coordinates(sortedroot1,
                                                       sortedroot2, verbose)

    # do the fuzzy compare
    if is_fuzzy_equal_node(sortedroot1, sortedroot2, absolute, relative,
                           zeroValueThreshold, verbose):
        return 0
    else:
        return 1
def main():
    super_concat = False
    options, args = interface()
    xml = ElementTree().parse(options.input, parser=XMLParser(target=MyTreeBuilder()))
    # delete the older subs. models from the xml file
    for node in ['HKYModel','siteModel']:
        xml = delete_node(xml, node, 1) 
    if super_concat:
        xml = delete_node(xml, 'treeLikehood', 1)
    # delete the kappa and frequency parameters in 'operators'
    for parameter in ['kappa', 'frequencies']:
        xml = delete_children_from_node(xml, 'operators', parameter)
        xml = delete_children_from_node(xml, 'prior', parameter, 2)
        xml = delete_children_from_log_node(xml, 'log', parameter)
    # jettison some comments
    xml = comment_remover(xml, ['HKY substitution model','site model']) 
    # get our subs model information
    sub_models_from_modeltest = {line.strip().split('\t')[0]:line.strip().split('\t')[1].split('-')[1]
                                    for line in open(options.subs, 'rU')}
    model_names, site_names = get_xml_model_names(set(sub_models_from_modeltest.values()))
    model_data = ElementTree().parse(options.params, parser=XMLParser(target=MyTreeBuilder()))
    
    # get the xml data that we need to add for the models and their parameters
    models_to_add = get_generic_section_to_add(model_names, model_data, 'models')
    sites_to_add = get_generic_section_to_add(site_names, model_data, 'sites')
    operators_to_add = get_generic_section_children_to_add(model_names, model_data, 'operators')
    log_entries_to_add = get_log_entries_to_add(model_names.union(site_names), model_data)
    priors_to_add = get_generic_section_children_to_add(model_names, model_data, 'priors')
    if super_concat:
        likelihood_framework_to_add = get_generic_section_to_add(model_names, model_data, 'likelihoods')
    
    # get the last position of the strictClockBranchRates
    insert_position = get_position(xml, 'strictClockBranchRates')
    # insert the models and sites we need
    insert_position = insert_models_and_sites(xml, insert_position, models_to_add, sites_to_add)
    # modify the tree likelihood statements
    if not super_concat:
        xml = update_tree_likelihoods(xml, sub_models_from_modeltest)
    else:
        insert_position = get_position(xml, 'siteModel')
        xml = insert_tree_likelihoods(xml, sub_models_from_modeltest)
    # insert the operators we need
    xml = insert_to_generic_sections(xml, operators_to_add, 'operators', 'operators')
    # insert the priors we need
    xml = insert_to_generic_sections(xml, priors_to_add, 'prior', 'prior')
    # alter the log node to collect data
    xml = insert_to_generic_sections(xml, log_entries_to_add, 'log', 'fileLog')
    # write to the output file
    write(xml, options.output)
示例#4
0
 def __init__(self, xml_encode):
     super().__init__()
     parser = XMLParser(target=MaxDepth())
     pepe = parser.feed(xml_encode)
     parser.close()
     print(self.resul_property)
     self.xml_encode = xml_encode
示例#5
0
    def load(self, oFileBrowser: FileBrowser) -> None:

        oCode: Element
        oXMLRoot: Element
        uCmd: str

        if len(oFileBrowser.selection) != 0:
            self.uCodesetFileName = oFileBrowser.selection[0]

        try:
            oParser = XMLParser(target=CommentedTreeBuilder())
            oXMLRoot = LoadXMLFile(oFile=cFileName().ImportFullPath(
                uFnFullName=self.uCodesetFileName),
                                   oParser=oParser)
            # oXMLRoot         = LoadXMLFile(oFile=cFileName().ImportFullPath(uFnFullName=self.uCodesetFileName))

            if oXMLRoot is not None:
                self.oTextInput.text = tostring(oXMLRoot)
                for oCode in oXMLRoot:
                    uCmd = oCode.get('cmd')
                    if uCmd is not None:
                        oCode.set('cmd', GlobalCacheToKeene(uCmd))
                self.AdjustRepManagerITachToKeene(oXMLRoot)
                self.oTextInput2.text = tostring(oXMLRoot)
        except ParseError as uErrMsg:
            uMsg = 'Parse Error ' + str(uErrMsg)
            Logger.error(uMsg)
        except Exception as e:
            uMsg = 'General Error ' + str(e)
            Logger.error(uMsg)

        self.dismiss_popup()
示例#6
0
def test_name(xml_data, login_strings):
    for string in login_strings:
        try:
            XMLParser(target=xml_data).feed(string)
        except ParseError:
            continue
    assert xml_data.name == "Crannach"
示例#7
0
def test_instance(xml_data, login_strings):
    for string in login_strings:
        try:
            XMLParser(target=xml_data).feed(string)
        except ParseError:
            continue
    assert xml_data.game == "DR"
示例#8
0
    def _parse_doc(self, raw_doc) -> bytes:
        from lxml.etree import (
            XMLParser,
            fromstring,
            parse,
            tostring,
        )

        handle_data = get_data_from_filepath(
            filepath_or_buffer=raw_doc,
            encoding=self.encoding,
            compression=self.compression,
            storage_options=self.storage_options,
        )

        with preprocess_data(handle_data) as xml_data:
            curr_parser = XMLParser(encoding=self.encoding)

            if isinstance(xml_data, io.StringIO):
                doc = fromstring(
                    xml_data.getvalue().encode(self.encoding), parser=curr_parser
                )
            else:
                doc = parse(xml_data, parser=curr_parser)

        return tostring(doc)
示例#9
0
def convert_jpr(filename, outfilename):
    parser = XMLParser(encoding='UTF-8')
    xml = ET.parse(filename, parser)
    xmlDocType = xml.getroot().tag
    ET.register_namespace('jpr', "http://xmlns.oracle.com/ide/project")
    if xmlDocType == "{http://xmlns.oracle.com/ide/project}project":
        to_be_added_library = []
        to_be_removed_library = []
        internalDefinition = xml.getroot().find(
            ".//hash[@n='internalDefinitions']")
        if internalDefinition is not None:
            libraryDefinitions = internalDefinition.find(
                "./list[@n='libraryDefinitions']")
            for libraryDefinition in libraryDefinitions:
                id = libraryDefinition.find("./value[@n='id']")
                lib = mapping[id.get("v")]
                if lib is not None:
                    to_be_added_library.append(lib)
                    libraryDefinitions.remove(libraryDefinition)
                    to_be_removed_library.append(id.get("v"))
        exportedReferences = xml.getroot().find(
            ".//list[@n='exportedReferences']")
        libraryReferences = xml.getroot().find(
            ".//list[@n='libraryReferences']")
        removeLibraryReferences(exportedReferences, to_be_removed_library)
        removeLibraryReferences(libraryReferences, to_be_removed_library)
        for lib in to_be_added_library:
            addLibraryReference(exportedReferences, lib)
            addLibraryReference(libraryReferences, lib)

        #outstring = ET.tostring(xml, encoding='utf8', method='xml')
        #print(xml.tostring())
        xml.write(outfilename, encoding="UTF-8", xml_declaration=True)
示例#10
0
    def parse_impl(self):

        ignore_anchors = True
        saw_anchor = False

        # Cast to str to ensure not unicode under Python 2, as the parser
        # doesn't like that.
        parser = XMLParser(encoding=str('UTF-8'))
        element_iter = ET.iterparse(self.handle, events=("start", "end"), parser=parser)
        for pos, element in element_iter:
            tag, class_attr = _tag_and_class_attr(element)
            if tag == "h1" and pos == "end":
                if not self.user:
                    self.user = element.text.strip()
            elif tag == "div" and "content" in class_attr and pos == "start":
                ignore_anchors = False
            elif tag == "a" and pos == "start":
                if ignore_anchors:
                    continue
                saw_anchor = True
                participants = self.parse_participants(element)
                thread_path = re.sub(r'^../', '', element.attrib['href'])
                if using_windows():
                    thread_path = thread_path.replace('/', '\\')
                self.process_thread(participants, thread_path)

        if not saw_anchor:
            # Indicator of a `messages.htm` file that is probably in the legacy format.
            raise UnsuitableParserError

        self._clear_output()
        return FacebookChatHistory(self.user, self.chat_threads)
示例#11
0
def parse_xml(source):
    """ Parse source (a filelike object) into an element tree.  If
    html is true, use a parser that can resolve somewhat ambiguous
    HTML into XHTML.  Otherwise use a 'normal' parser only."""
    builder = MeldTreeBuilder()
    parser = XMLParser(target=builder)
    return do_parse(source, parser)
示例#12
0
	def from_string(cls, xml_as_string):
		if isinstance(xml_as_string, bytes):
			xml_as_string = xml_as_string.decode()
		try:
			return xml.etree.ElementTree.fromstring(xml_as_string, parser=XMLParser(target=TreeBuilder(element_factory=cls)))
		except xml.etree.ElementTree.ParseError:
			return cls.from_string(xml_as_string.replace("<style scoped>","<style scoped='1'>"))
def getQueryResult(query, detailedLog=True):
    parser = XMLParser(target=ProcessCSQueryResult())
    queryAnswerXML = urllib.urlopen(query).read()
    if detailedLog:
        log_CSQuery(queryAnswerXML)
    parser.feed(queryAnswerXML)
    return parser.close()
示例#14
0
文件: xml.py 项目: tnir/pandas
    def _parse_doc(
        self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str]
    ) -> _Element:
        from lxml.etree import (
            XMLParser,
            fromstring,
            parse,
        )

        handle_data = get_data_from_filepath(
            filepath_or_buffer=raw_doc,
            encoding=self.encoding,
            compression=self.compression,
            storage_options=self.storage_options,
        )

        with preprocess_data(handle_data) as xml_data:
            curr_parser = XMLParser(encoding=self.encoding)

            if isinstance(xml_data, io.StringIO):
                if self.encoding is None:
                    raise TypeError(
                        "Can not pass encoding None when input is StringIO."
                    )

                doc = fromstring(
                    xml_data.getvalue().encode(self.encoding), parser=curr_parser
                )
            else:
                doc = parse(xml_data, parser=curr_parser)

        return doc
示例#15
0
def get_ebelge_users():
    parser = XMLParser(target=EbelgeUsers())
    parser.feed(
        frappe.read_file(
            frappe.get_site_path("private", "files", "KullaniciListesiXml",
                                 "newUserPkList.xml")))
    return parser.close()
示例#16
0
def get_max_depth(exampleXml):
    target = MaxDepth()
    parser = XMLParser(target=target)
    parser.feed(exampleXml)
    depth = parser.close()

    return depth
示例#17
0
 def from_string(cls, xml_as_string, _fix_step=0):
     if isinstance(xml_as_string, bytes):
         xml_as_string = xml_as_string.decode()
     parser = XMLParser(target=TreeBuilder(element_factory=cls))
     try:
         return xml.etree.ElementTree.fromstring(xml_as_string,
                                                 parser=parser)
     except Exception as err:  # xml.etree.ElementTree.ParseError
         # see: https://stackoverflow.com/questions/47917787/xml-etree-elementtree-parseerror-exception-handling-not-catching-errors
         if type(err).__name__ == 'ParseError':
             if _fix_step == 0:
                 return cls.from_string(xml_as_string.replace(
                     "<style scoped>", "<style scoped='1'>"),
                                        _fix_step=1)
             elif _fix_step == 1:
                 return cls.from_string("<div>" + xml_as_string.replace(
                     "<style scoped>", "<style scoped='1'>") + "</div>",
                                        _fix_step=2)
             elif _fix_step == 2:
                 return cls.from_string(xml_as_string.replace(
                     "&nbsp;", " "),
                                        _fix_step=3)
             else:
                 import logging
                 lined = "\n".join(f"{n: 4d} | {line}"
                                   for n, line in enumerate(
                                       xml_as_string.split("\n"), start=1))
                 logging.getLogger("").error(f"ParseError in:\n{lined}")
                 raise
         else:
             raise
示例#18
0
def process_ant_segments(ant_file) -> List[tuple]:
    with ZipFile(ant_file) as zip:
        with zip.open('annotation.xml') as f:
            annot = ElementTree.parse(
                f, parser=XMLParser(encoding='utf-8')).getroot()

    ns = {'a': 'http://tempuri.org/AnnotationSystemDataSet.xsd'}

    el = annot.find("a:Layer[a:Name='phrase']", ns)
    phrase_id = el.find('a:Id', ns).text

    log.info(f'Found phrase layer id: {phrase_id}')

    el = annot.find("a:Configuration[a:Key='Samplerate']", ns)
    samplerate = float(el.find('a:Value', ns).text)

    log.info(f'Sample rate: {samplerate}')

    segments = []
    for el in annot.findall(f"a:Segment[a:IdLayer='{phrase_id}']", ns):
        text = el.find('a:Label', ns).text
        start = float(el.find('a:Start', ns).text) / samplerate
        start = round(start, 2)
        length = float(el.find('a:Duration', ns).text) / samplerate
        end = round(start + length, 2)
        segments.append((text, start, end))

    return sorted(segments, key=lambda x: x[1])
示例#19
0
    def Parse(self, data):
        if len(data) < sizeof(CryXMLBHeader):
            raise ValueError(
                "File is not a binary XML file (file size is too small).")

        self._data = data
        self._header = CryXMLBHeader.from_buffer(data, 0)

        # TODO: actually do header validation - see references
        if self._header.signature != b"CryXmlB":
            if self._header.signature.startswith(b"<"):
                # try parsing as a normal xml file
                parser = XMLParser(target=self.target)
                parser.feed(self._data)
                raise _StandardXmlFile()
            raise ParseError("Invalid CryXmlB Signature")

        self._attributes = [
            self._read_attribute(i)
            for i in range(self._header.attributes_count)
        ]
        self._child_indices = [
            self._read_child_index(i)
            for i in range(self._header.child_table_count)
        ]
        self._nodes = [
            self._read_node(i) for i in range(self._header.node_count)
        ]

        root_node = self._read_node(0)
        assert root_node.parent_index == CRYXML_NO_PARENT
        self._iter_parse_nodes(root_node)
示例#20
0
    def _parse_content(self, use_bs4):
        """
        Parses the HTML content as a stream. This is far less memory
        intensive than loading the entire HTML file into memory, like
        BeautifulSoup does.
        """
        if not use_bs4:
            for pos, element in ET.iterparse(
                    self.stream,
                    events=("start", "end"),
                    parser=XMLParser(encoding=str('UTF-8'))):
                self._process_element(pos, element)
        else:
            # Although apparently uncommon, some users have message logs that
            # may not conform to strict XML standards. We will fall back to
            # the BeautifulSoup parser in that case.
            from bs4 import BeautifulSoup
            soup = BeautifulSoup(open(self.stream, 'r').read(), 'html.parser')
            self._process_element('end', soup.find('h1'))
            for thread_element in soup.find_all('div', class_='thread'):
                self._process_element('start', thread_element)
                for e in thread_element:
                    if e.name == 'div':
                        user = e.find('span', class_='user')
                        meta = e.find('span', class_='meta')
                        self._process_element('end', user)
                        self._process_element('end', meta)
                    elif e.name == 'p':
                        self._process_element('end', e)
                self._process_element('end', thread_element)

        self._clear_output()
示例#21
0
    def build(self, root=None):
        if root is None:
            was_root = True
            root = TreeBuilder()
        else:
            was_root = False

        root.start(self.tagname(), self.attrs())
        for i, child in enumerate(self.children):
            if isinstance(child, HTMLBuilder):
                child.build(root=root)
            else:
                if i in self._formatted:
                    try:
                        proxy = TreeProxy(root)
                        parser = XMLParser(html=True, target=proxy)
                        parser.feed(child)
                        proxy.cleanup()
                    except Exception as e:
                        print("Bad formatting", e)
                        root.data(str(child))
                else:
                    root.data(str(child))
        root.end(self.tagname())

        if was_root:
            root = root.close()
            return str(tostring(root, method="html").decode('utf-8'))
示例#22
0
 def __init__(self, stream, *pos, **kw):
     self._stream = stream
     self._pending = deque()
     builder = _QueueBuilder(self._pending)
     self._parser = XMLParser(*pos, target=builder, **kw)
     self._builders = [TreeBuilder()]
     [method, pos, kw] = self._read()
     self.element = getattr(self._builders[-1], method)(*pos, **kw)
示例#23
0
def test_server_time(xml_data, login_strings):
    # Here I have just manually plucked the last prompt time from the log
    for string in login_strings:
        try:
            XMLParser(target=xml_data).feed(string)
        except ParseError:
            continue
    assert xml_data.server_time == 1626783177
示例#24
0
def test_player_id(xml_data, login_strings):
    # TODO: Make the XMLParser for loop DRY, its duplicated in every test and in core.
    for string in login_strings:
        try:
            XMLParser(target=xml_data).feed(string)
        except ParseError:
            continue
    assert xml_data.player_id == "440984"
示例#25
0
 def get_entries(self):
     """
     Load entries
     :return:
     """
     self.tree.parse(self.filename, XMLParser(encoding='utf-8'))
     for entry in self.tree.iter("entry"):
         self.entries.append(Entry(entry))
 def parse(self, fIn, oHolder):
     """Parse XML file into the card set holder"""
     oParser = XMLParser(target=self._cState(oHolder))
     try:
         for sLine in fIn:
             oParser.feed(sLine)
     except ParseError as oExp:
         raise IOError('Not an valid XML file') from oExp
示例#27
0
def HTMLOfENML(text, resources={}):

    target = HTMLCreatorTarget(resources)
    parser = XMLParser(target=target)
    parser.feed(text)
    parser.close()

    return tostring(target.root, encoding='utf8', method='html')
示例#28
0
def new_parsetree_from_xml(xml):
    # For some reason this does not work with cElementTree.XMLBuilder ...
    from xml.etree.ElementTree import XMLParser
    from zim.formats import ParseTree
    builder = XMLParser()
    builder.feed(xml)
    root = builder.close()
    return ParseTree(root)
    def find(self):
        filename = self.config.get_input_user_mappings_path()
        parser = XMLParser(target=self.parser)
        root = ElementTree.parse(filename, parser=parser)
        mod_id = self.config.walk_id()

        path = '{0}//button[@modID="{1}"]'.format(self.x_axis_xpath, mod_id)
        return root.find(path)
示例#30
0
 def _parse_xml(self, xml_data):
   """
   Parse the xml into a python dictionary
   """
   parser = XMLParser()
   tree = parser.feed(xml_data)
   root = parser.close()
   data_dict = XmlDictConfig(root)
   return data_dict