def from_dataframe(cls, df, **kwargs): if isinstance(df, pandas.DataFrame): return xml.etree.ElementTree.fromstring( df.to_html(**kwargs), parser=XMLParser(target=TreeBuilder(element_factory=cls))) elif isinstance(df, pandas.io.formats.style.Styler): render = df.render() render = re.sub(r"colspan=([1234567890]*)>", "colspan=\"\g<1>\">", render, 0) try: return xml.etree.ElementTree.fromstring( f"<div>{render}</div>", parser=XMLParser(target=TreeBuilder(element_factory=cls))) except Exception as parse_err: if type(parse_err).__name__ == 'ParseError': x = Elem('div') x << xml.etree.ElementTree.fromstring( df.data.to_html(**kwargs), parser=XMLParser(target=TreeBuilder( element_factory=cls))) x << Elem('pre', text=render) x << Elem('pre', text=str(parse_err)) return x else: raise
def compare_vtk(vtk1, vtk2, absolute=1.2e-7, relative=1e-2, zeroValueThreshold={}, verbose=True): """ Take two vtk files and fuzzy compare them. Returns an exit key as return value. :param vtk1: The filename of the vtk files to compare :type vtk1: string :param vtk2: The filename of the vtk files to compare :type vtk2: string :param absolute: The epsilon used for comparing numbers with an absolute criterion :type absolute: float :param relative: The epsilon used for comparing numbers with an relative criterion :type relative: float :param zeroValueThreshold: A dictionary of parameter value pairs that set the threshold under which a number is treated as zero for a certain parameter. Use this parameter if you have to avoid comparisons of very small numbers for a certain parameter. :type zeroValueThreshold: dict :param verbose: If the script should produce informative output. Enabled by default as the details give the tester a lot more information on why tests fail. :type verbose: bool """ # construct element tree from vtk file root1 = ET.parse(vtk1, parser=XMLParser(target=VTKTreeBuilder())).getroot() root2 = ET.parse(vtk2, parser=XMLParser(target=VTKTreeBuilder())).getroot() # sort the vtk file in case nodes appear in different positions # e.g. because of minor changes in the output code sortedroot1 = sort_vtk(root1) sortedroot2 = sort_vtk(root2) if verbose: print("Comparing {} and {}".format(vtk1, vtk2)) print( "...with a maximum relative error of {} and a maximum absolute error of {}*p_max, where p_max is highest absolute parameter value." .format(relative, absolute)) # sort the vtk file so that the comparison is independent of the # index numbering (coming e.g. from different grid managers) sortedroot1, sortedroot2 = sort_vtk_by_coordinates(sortedroot1, sortedroot2, verbose) # do the fuzzy compare if is_fuzzy_equal_node(sortedroot1, sortedroot2, absolute, relative, zeroValueThreshold, verbose): return 0 else: return 1
def main(): super_concat = False options, args = interface() xml = ElementTree().parse(options.input, parser=XMLParser(target=MyTreeBuilder())) # delete the older subs. models from the xml file for node in ['HKYModel','siteModel']: xml = delete_node(xml, node, 1) if super_concat: xml = delete_node(xml, 'treeLikehood', 1) # delete the kappa and frequency parameters in 'operators' for parameter in ['kappa', 'frequencies']: xml = delete_children_from_node(xml, 'operators', parameter) xml = delete_children_from_node(xml, 'prior', parameter, 2) xml = delete_children_from_log_node(xml, 'log', parameter) # jettison some comments xml = comment_remover(xml, ['HKY substitution model','site model']) # get our subs model information sub_models_from_modeltest = {line.strip().split('\t')[0]:line.strip().split('\t')[1].split('-')[1] for line in open(options.subs, 'rU')} model_names, site_names = get_xml_model_names(set(sub_models_from_modeltest.values())) model_data = ElementTree().parse(options.params, parser=XMLParser(target=MyTreeBuilder())) # get the xml data that we need to add for the models and their parameters models_to_add = get_generic_section_to_add(model_names, model_data, 'models') sites_to_add = get_generic_section_to_add(site_names, model_data, 'sites') operators_to_add = get_generic_section_children_to_add(model_names, model_data, 'operators') log_entries_to_add = get_log_entries_to_add(model_names.union(site_names), model_data) priors_to_add = get_generic_section_children_to_add(model_names, model_data, 'priors') if super_concat: likelihood_framework_to_add = get_generic_section_to_add(model_names, model_data, 'likelihoods') # get the last position of the strictClockBranchRates insert_position = get_position(xml, 'strictClockBranchRates') # insert the models and sites we need insert_position = insert_models_and_sites(xml, insert_position, models_to_add, sites_to_add) # modify the tree likelihood statements if not super_concat: xml = update_tree_likelihoods(xml, sub_models_from_modeltest) else: insert_position = get_position(xml, 'siteModel') xml = insert_tree_likelihoods(xml, sub_models_from_modeltest) # insert the operators we need xml = insert_to_generic_sections(xml, operators_to_add, 'operators', 'operators') # insert the priors we need xml = insert_to_generic_sections(xml, priors_to_add, 'prior', 'prior') # alter the log node to collect data xml = insert_to_generic_sections(xml, log_entries_to_add, 'log', 'fileLog') # write to the output file write(xml, options.output)
def __init__(self, xml_encode): super().__init__() parser = XMLParser(target=MaxDepth()) pepe = parser.feed(xml_encode) parser.close() print(self.resul_property) self.xml_encode = xml_encode
def load(self, oFileBrowser: FileBrowser) -> None: oCode: Element oXMLRoot: Element uCmd: str if len(oFileBrowser.selection) != 0: self.uCodesetFileName = oFileBrowser.selection[0] try: oParser = XMLParser(target=CommentedTreeBuilder()) oXMLRoot = LoadXMLFile(oFile=cFileName().ImportFullPath( uFnFullName=self.uCodesetFileName), oParser=oParser) # oXMLRoot = LoadXMLFile(oFile=cFileName().ImportFullPath(uFnFullName=self.uCodesetFileName)) if oXMLRoot is not None: self.oTextInput.text = tostring(oXMLRoot) for oCode in oXMLRoot: uCmd = oCode.get('cmd') if uCmd is not None: oCode.set('cmd', GlobalCacheToKeene(uCmd)) self.AdjustRepManagerITachToKeene(oXMLRoot) self.oTextInput2.text = tostring(oXMLRoot) except ParseError as uErrMsg: uMsg = 'Parse Error ' + str(uErrMsg) Logger.error(uMsg) except Exception as e: uMsg = 'General Error ' + str(e) Logger.error(uMsg) self.dismiss_popup()
def test_name(xml_data, login_strings): for string in login_strings: try: XMLParser(target=xml_data).feed(string) except ParseError: continue assert xml_data.name == "Crannach"
def test_instance(xml_data, login_strings): for string in login_strings: try: XMLParser(target=xml_data).feed(string) except ParseError: continue assert xml_data.game == "DR"
def _parse_doc(self, raw_doc) -> bytes: from lxml.etree import ( XMLParser, fromstring, parse, tostring, ) handle_data = get_data_from_filepath( filepath_or_buffer=raw_doc, encoding=self.encoding, compression=self.compression, storage_options=self.storage_options, ) with preprocess_data(handle_data) as xml_data: curr_parser = XMLParser(encoding=self.encoding) if isinstance(xml_data, io.StringIO): doc = fromstring( xml_data.getvalue().encode(self.encoding), parser=curr_parser ) else: doc = parse(xml_data, parser=curr_parser) return tostring(doc)
def convert_jpr(filename, outfilename): parser = XMLParser(encoding='UTF-8') xml = ET.parse(filename, parser) xmlDocType = xml.getroot().tag ET.register_namespace('jpr', "http://xmlns.oracle.com/ide/project") if xmlDocType == "{http://xmlns.oracle.com/ide/project}project": to_be_added_library = [] to_be_removed_library = [] internalDefinition = xml.getroot().find( ".//hash[@n='internalDefinitions']") if internalDefinition is not None: libraryDefinitions = internalDefinition.find( "./list[@n='libraryDefinitions']") for libraryDefinition in libraryDefinitions: id = libraryDefinition.find("./value[@n='id']") lib = mapping[id.get("v")] if lib is not None: to_be_added_library.append(lib) libraryDefinitions.remove(libraryDefinition) to_be_removed_library.append(id.get("v")) exportedReferences = xml.getroot().find( ".//list[@n='exportedReferences']") libraryReferences = xml.getroot().find( ".//list[@n='libraryReferences']") removeLibraryReferences(exportedReferences, to_be_removed_library) removeLibraryReferences(libraryReferences, to_be_removed_library) for lib in to_be_added_library: addLibraryReference(exportedReferences, lib) addLibraryReference(libraryReferences, lib) #outstring = ET.tostring(xml, encoding='utf8', method='xml') #print(xml.tostring()) xml.write(outfilename, encoding="UTF-8", xml_declaration=True)
def parse_impl(self): ignore_anchors = True saw_anchor = False # Cast to str to ensure not unicode under Python 2, as the parser # doesn't like that. parser = XMLParser(encoding=str('UTF-8')) element_iter = ET.iterparse(self.handle, events=("start", "end"), parser=parser) for pos, element in element_iter: tag, class_attr = _tag_and_class_attr(element) if tag == "h1" and pos == "end": if not self.user: self.user = element.text.strip() elif tag == "div" and "content" in class_attr and pos == "start": ignore_anchors = False elif tag == "a" and pos == "start": if ignore_anchors: continue saw_anchor = True participants = self.parse_participants(element) thread_path = re.sub(r'^../', '', element.attrib['href']) if using_windows(): thread_path = thread_path.replace('/', '\\') self.process_thread(participants, thread_path) if not saw_anchor: # Indicator of a `messages.htm` file that is probably in the legacy format. raise UnsuitableParserError self._clear_output() return FacebookChatHistory(self.user, self.chat_threads)
def parse_xml(source): """ Parse source (a filelike object) into an element tree. If html is true, use a parser that can resolve somewhat ambiguous HTML into XHTML. Otherwise use a 'normal' parser only.""" builder = MeldTreeBuilder() parser = XMLParser(target=builder) return do_parse(source, parser)
def from_string(cls, xml_as_string): if isinstance(xml_as_string, bytes): xml_as_string = xml_as_string.decode() try: return xml.etree.ElementTree.fromstring(xml_as_string, parser=XMLParser(target=TreeBuilder(element_factory=cls))) except xml.etree.ElementTree.ParseError: return cls.from_string(xml_as_string.replace("<style scoped>","<style scoped='1'>"))
def getQueryResult(query, detailedLog=True): parser = XMLParser(target=ProcessCSQueryResult()) queryAnswerXML = urllib.urlopen(query).read() if detailedLog: log_CSQuery(queryAnswerXML) parser.feed(queryAnswerXML) return parser.close()
def _parse_doc( self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str] ) -> _Element: from lxml.etree import ( XMLParser, fromstring, parse, ) handle_data = get_data_from_filepath( filepath_or_buffer=raw_doc, encoding=self.encoding, compression=self.compression, storage_options=self.storage_options, ) with preprocess_data(handle_data) as xml_data: curr_parser = XMLParser(encoding=self.encoding) if isinstance(xml_data, io.StringIO): if self.encoding is None: raise TypeError( "Can not pass encoding None when input is StringIO." ) doc = fromstring( xml_data.getvalue().encode(self.encoding), parser=curr_parser ) else: doc = parse(xml_data, parser=curr_parser) return doc
def get_ebelge_users(): parser = XMLParser(target=EbelgeUsers()) parser.feed( frappe.read_file( frappe.get_site_path("private", "files", "KullaniciListesiXml", "newUserPkList.xml"))) return parser.close()
def get_max_depth(exampleXml): target = MaxDepth() parser = XMLParser(target=target) parser.feed(exampleXml) depth = parser.close() return depth
def from_string(cls, xml_as_string, _fix_step=0): if isinstance(xml_as_string, bytes): xml_as_string = xml_as_string.decode() parser = XMLParser(target=TreeBuilder(element_factory=cls)) try: return xml.etree.ElementTree.fromstring(xml_as_string, parser=parser) except Exception as err: # xml.etree.ElementTree.ParseError # see: https://stackoverflow.com/questions/47917787/xml-etree-elementtree-parseerror-exception-handling-not-catching-errors if type(err).__name__ == 'ParseError': if _fix_step == 0: return cls.from_string(xml_as_string.replace( "<style scoped>", "<style scoped='1'>"), _fix_step=1) elif _fix_step == 1: return cls.from_string("<div>" + xml_as_string.replace( "<style scoped>", "<style scoped='1'>") + "</div>", _fix_step=2) elif _fix_step == 2: return cls.from_string(xml_as_string.replace( " ", " "), _fix_step=3) else: import logging lined = "\n".join(f"{n: 4d} | {line}" for n, line in enumerate( xml_as_string.split("\n"), start=1)) logging.getLogger("").error(f"ParseError in:\n{lined}") raise else: raise
def process_ant_segments(ant_file) -> List[tuple]: with ZipFile(ant_file) as zip: with zip.open('annotation.xml') as f: annot = ElementTree.parse( f, parser=XMLParser(encoding='utf-8')).getroot() ns = {'a': 'http://tempuri.org/AnnotationSystemDataSet.xsd'} el = annot.find("a:Layer[a:Name='phrase']", ns) phrase_id = el.find('a:Id', ns).text log.info(f'Found phrase layer id: {phrase_id}') el = annot.find("a:Configuration[a:Key='Samplerate']", ns) samplerate = float(el.find('a:Value', ns).text) log.info(f'Sample rate: {samplerate}') segments = [] for el in annot.findall(f"a:Segment[a:IdLayer='{phrase_id}']", ns): text = el.find('a:Label', ns).text start = float(el.find('a:Start', ns).text) / samplerate start = round(start, 2) length = float(el.find('a:Duration', ns).text) / samplerate end = round(start + length, 2) segments.append((text, start, end)) return sorted(segments, key=lambda x: x[1])
def Parse(self, data): if len(data) < sizeof(CryXMLBHeader): raise ValueError( "File is not a binary XML file (file size is too small).") self._data = data self._header = CryXMLBHeader.from_buffer(data, 0) # TODO: actually do header validation - see references if self._header.signature != b"CryXmlB": if self._header.signature.startswith(b"<"): # try parsing as a normal xml file parser = XMLParser(target=self.target) parser.feed(self._data) raise _StandardXmlFile() raise ParseError("Invalid CryXmlB Signature") self._attributes = [ self._read_attribute(i) for i in range(self._header.attributes_count) ] self._child_indices = [ self._read_child_index(i) for i in range(self._header.child_table_count) ] self._nodes = [ self._read_node(i) for i in range(self._header.node_count) ] root_node = self._read_node(0) assert root_node.parent_index == CRYXML_NO_PARENT self._iter_parse_nodes(root_node)
def _parse_content(self, use_bs4): """ Parses the HTML content as a stream. This is far less memory intensive than loading the entire HTML file into memory, like BeautifulSoup does. """ if not use_bs4: for pos, element in ET.iterparse( self.stream, events=("start", "end"), parser=XMLParser(encoding=str('UTF-8'))): self._process_element(pos, element) else: # Although apparently uncommon, some users have message logs that # may not conform to strict XML standards. We will fall back to # the BeautifulSoup parser in that case. from bs4 import BeautifulSoup soup = BeautifulSoup(open(self.stream, 'r').read(), 'html.parser') self._process_element('end', soup.find('h1')) for thread_element in soup.find_all('div', class_='thread'): self._process_element('start', thread_element) for e in thread_element: if e.name == 'div': user = e.find('span', class_='user') meta = e.find('span', class_='meta') self._process_element('end', user) self._process_element('end', meta) elif e.name == 'p': self._process_element('end', e) self._process_element('end', thread_element) self._clear_output()
def build(self, root=None): if root is None: was_root = True root = TreeBuilder() else: was_root = False root.start(self.tagname(), self.attrs()) for i, child in enumerate(self.children): if isinstance(child, HTMLBuilder): child.build(root=root) else: if i in self._formatted: try: proxy = TreeProxy(root) parser = XMLParser(html=True, target=proxy) parser.feed(child) proxy.cleanup() except Exception as e: print("Bad formatting", e) root.data(str(child)) else: root.data(str(child)) root.end(self.tagname()) if was_root: root = root.close() return str(tostring(root, method="html").decode('utf-8'))
def __init__(self, stream, *pos, **kw): self._stream = stream self._pending = deque() builder = _QueueBuilder(self._pending) self._parser = XMLParser(*pos, target=builder, **kw) self._builders = [TreeBuilder()] [method, pos, kw] = self._read() self.element = getattr(self._builders[-1], method)(*pos, **kw)
def test_server_time(xml_data, login_strings): # Here I have just manually plucked the last prompt time from the log for string in login_strings: try: XMLParser(target=xml_data).feed(string) except ParseError: continue assert xml_data.server_time == 1626783177
def test_player_id(xml_data, login_strings): # TODO: Make the XMLParser for loop DRY, its duplicated in every test and in core. for string in login_strings: try: XMLParser(target=xml_data).feed(string) except ParseError: continue assert xml_data.player_id == "440984"
def get_entries(self): """ Load entries :return: """ self.tree.parse(self.filename, XMLParser(encoding='utf-8')) for entry in self.tree.iter("entry"): self.entries.append(Entry(entry))
def parse(self, fIn, oHolder): """Parse XML file into the card set holder""" oParser = XMLParser(target=self._cState(oHolder)) try: for sLine in fIn: oParser.feed(sLine) except ParseError as oExp: raise IOError('Not an valid XML file') from oExp
def HTMLOfENML(text, resources={}): target = HTMLCreatorTarget(resources) parser = XMLParser(target=target) parser.feed(text) parser.close() return tostring(target.root, encoding='utf8', method='html')
def new_parsetree_from_xml(xml): # For some reason this does not work with cElementTree.XMLBuilder ... from xml.etree.ElementTree import XMLParser from zim.formats import ParseTree builder = XMLParser() builder.feed(xml) root = builder.close() return ParseTree(root)
def find(self): filename = self.config.get_input_user_mappings_path() parser = XMLParser(target=self.parser) root = ElementTree.parse(filename, parser=parser) mod_id = self.config.walk_id() path = '{0}//button[@modID="{1}"]'.format(self.x_axis_xpath, mod_id) return root.find(path)
def _parse_xml(self, xml_data): """ Parse the xml into a python dictionary """ parser = XMLParser() tree = parser.feed(xml_data) root = parser.close() data_dict = XmlDictConfig(root) return data_dict