def getRTCCTV(self, lng, lat): param_cctv = "key=%s&ReqType=2&MinX=%s&MaxX=%s&MinY=%s&MaxY=%s&type=" % (self.API_KEY, str(lng-0.000001), str(lng+0.000001), str(lat-0.000001), str(lat+0.000001)) cctv_ex_data = urllib.request.urlopen(self.cctv_url + param_cctv + "ex") cctv_ex_xml = cctv_ex_data.read().decode('utf-8') cctv_ex_tree = eltree.fromstringlist(cctv_ex_xml) cctv_its_data = urllib.request.urlopen(self.cctv_url + param_cctv + "its") cctv_its_xml = cctv_its_data.read().decode('utf-8') cctv_its_tree = eltree.fromstringlist(cctv_its_xml) cctv_ex_arr = [] cctv_its_arr = [] cctv_arr = [] for i in cctv_ex_tree.findall('data'): data = {"content": i.find('cctvurl').text} cctv_ex_arr.append(json.dumps(data, ensure_ascii=False)) for i in cctv_its_tree.findall('data'): data = {"content": i.find('cctvurl').text} cctv_its_arr.append(json.dumps(data, ensure_ascii=False)) cctv_arr.extend(cctv_ex_arr) cctv_arr.extend(cctv_its_arr) print(cctv_arr) return cctv_arr
def getAcci(self): accid_ex_data = urllib.request.urlopen(self.accid_url + self.params + "ex") accid_ex_xml = accid_ex_data.read().decode('utf-8') accid_ex_tree = eltree.fromstringlist(accid_ex_xml) accid_its_data = urllib.request.urlopen(self.accid_url + self.params + "its") accid_its_xml = accid_its_data.read().decode('utf-8') accid_its_tree = eltree.fromstringlist(accid_its_xml) accid_ex_arr = [] accid_its_arr = [] accid_arr = [] for i in accid_ex_tree.findall('data'): data = {"title": i.find('incidentmsg').text, "lat": float(i.find('coordy').text), "lng": float(i.find('coordx').text), "content": i.find('incidentmsg').text } accid_ex_arr.append(json.dumps(data, ensure_ascii=False)) for i in accid_its_tree.findall('data'): data = {"title": i.find('incidentmsg').text, "lat": float(i.find('coordy').text), "lng": float(i.find('coordx').text), "content": i.find('incidentmsg').text } accid_its_arr.append(json.dumps(data, ensure_ascii=False)) accid_arr.extend(accid_ex_arr) accid_arr.extend(accid_its_arr) return accid_arr
def main(): ida_auto.set_ida_state(ida_auto.st_Work) data = None with open(ida_kernwin.ask_file(0, "*.xml", "Select a file to import")) as f: data = f.readlines() if data is None: ida_auto.set_ida_state(ida_auto.st_Ready) return ida_kernwin.show_wait_box("Importing file") make_basic_structs() try: # SM 1.10 <= has bad XML, assume its correct first then try to fix it tree = et.fromstringlist(data) except: fix_xml(data) tree = et.fromstringlist(data) if tree is None: ida_kernwin.hide_wait_box() ida_kernwin.warning("Something bad happened :(") ida_auto.set_ida_state(ida_auto.st_Ready) return global IMPORT_VTABLE IMPORT_VTABLE = ida_kernwin.ask_yn( 1, "Import virtual tables for classes? (Longer)") for i in tree: parse_class(i) ida_kernwin.hide_wait_box() ida_auto.set_ida_state(ida_auto.st_Ready)
def getCCTV(self): cctv_ex_data = urllib.request.urlopen(self.cctv_url + self.params + "ex") cctv_ex_xml = cctv_ex_data.read().decode('utf-8') cctv_ex_tree = eltree.fromstringlist(cctv_ex_xml) cctv_its_data = urllib.request.urlopen(self.cctv_url + self.params + "its") cctv_its_xml = cctv_its_data.read().decode('utf-8') cctv_its_tree = eltree.fromstringlist(cctv_its_xml) cctv_ex_arr = [] cctv_its_arr = [] cctv_arr = [] for i in cctv_ex_tree.findall('data'): data = {"title": i.find('cctvname').text, "lat": float(i.find('coordy').text), "lng": float(i.find('coordx').text), "content": i.find('cctvurl').text } cctv_ex_arr.append(json.dumps(data, ensure_ascii=False)) for i in cctv_its_tree.findall('data'): data = {"title": i.find('cctvname').text, "lat": float(i.find('coordy').text), "lng": float(i.find('coordx').text), "content": i.find('cctvurl').text } cctv_its_arr.append(json.dumps(data, ensure_ascii=False)) cctv_arr.extend(cctv_ex_arr) cctv_arr.extend(cctv_its_arr) return cctv_arr
def create_xml_diff_from_files(file1, file2, logger=None): if not file1 or not file2: raise Exception('Expected files path as parameters') with open(file1, 'r') as _f1: _lines = _f1.readlines() _root1 = ET.fromstringlist(_lines) with open(file2, 'r') as _f2: _lines = _f2.readlines() _root2 = ET.fromstringlist(_lines) return XmlComparator(_root1, _root2, logger)
def split_file(filename): # we want you to split the input file into separate files # each containing a single patent. # As a hint - each patent declaration starts with the same line that was causing the error # The new files should be saved with filename in the following format: # "{}-{}".format(filename, n) where n is a counter, starting from 0. output = [] data = {} f = open(filename) count = 0 file_number = 0 # import pprint # pprint.pprint(f.readlines()) output.append(f.readline()) for line in f.readlines(): if line.startswith("<?xml"): data["patent.data-{}".format(file_number)] = output root = ET.fromstringlist(output) # print "" # print root.tag # print root.attrib # # for child in root: # print(child.tag, child.attrib) tree = ET.ElementTree(root) tree.write("patent.data-{}".format(file_number), encoding = 'UTF-8') output = [] file_number += 1 output.append(line) data["patent.data-{}".format(file_number)] = output root = ET.fromstringlist(output) tree = ET.ElementTree(root) tree.write("patent.data-{}".format(file_number), encoding = 'UTF-8') #import pprint #pprint.pprint(data) # return data pass
def parse_opml_file(filename): with open(filename) as file: tree = etree.fromstringlist(file) return [ node.get('xmlUrl') for node in tree.findall("*/outline/[@type='rss']") if node.get('xmlUrl') is not None ]
def __parse_linear_base64(self) -> Element: """Original parser will fail in case such email ``` Content-Type: text/plain\r\n Content-Transfer-Encoding: base64\r\n\r\n PHByZW...==\r\n ... ...==\r\n ``` as a workaround getting raw body and split by '\n' and parse xml as string list :return: Element """ def decode_payload(data: Any) -> Generator: """ :return: generator """ if isinstance(data, (str, bytes)): data = data.splitlines() for line in filter(None, data): if line[-2:] in ['==', b'=='] or line[-1] in ['+', b'+']: yield body_decode(line) else: yield line return ElementTree.fromstringlist( decode_payload(self.__message.get_payload()) )
def add_data_block(self): #2nd step: add a data block with Org as suffix, and put value in it Logger.recordLog("****adding data block*****\r\n") parent = self.root.find('MEM') datablock_list = parent.findall(DATA_BLOCK) datablock_namelist = self._elementNameList(datablock_list, DATA_BLOCK_NAME) for key in data_array: block_name = key string = block_name + TEXT_VALUE_EXSIT #if current value of the datablock has been added in cnt (if no data need to be added in the cnt, don't add the corresponding data block) if string in datablock_namelist: continue elif operator.eq(data_array[key][0], None) or operator.eq(data_array[key][0], '') or operator.eq(data_array[key][0], 'EMPTY'): continue #else else: new_blocks = self._setValueForTemplate(data_array, block_name, data_block_template) text = block_name + SEARCH_BY_TEXT idx = datablock_namelist.index(text) + 1 new_elements = ETree.fromstringlist(new_blocks) element_list = new_elements.findall(DATA_BLOCK) for j in range(0, 1): parent.insert(idx + j + 1,new_elements[j]) #self._saveCNT() Logger.recordLog("****End of adding data block*****\r\n")
def analysis(start, end, path): msg_list = [] set_list = [] start = datetime.strptime(start.get(), '%Y-%m-%d') end = datetime.strptime(end.get(), '%Y-%m-%d') if not path: path = './data/arcgis_log/' wb = openpyxl.Workbook(write_only=True) sh_counts_times = wb.create_sheet(title="total count") sh_all = wb.create_sheet(title="total table") sh_user_usage_count_2 = wb.create_sheet(title="user total count") sh_counts_times.append(["source", "count"]) sh_all.append([ "time", 'type', 'code', "source", "process", "thread", "methodName", "machine", "user", "elapsed", "msg" ]) sh_user_usage_count_2.append(["user", "source", "count"]) print("start to analysis") for fn in os.listdir(path): with open('%s/%s' % (path, fn), 'r', encoding='utf-8') as f: it = itertools.chain('<root>', f, '</root>') datas = ET.fromstringlist(it) for data in datas: split_time = datetime.strptime( data.attrib['time'].split('T')[0], '%Y-%m-%d') if start <= split_time <= end: if "Request user" in data.text: msg_list.append(data.text) sh_all.append([ data.attrib['time'], data.attrib['type'], data.attrib['code'], data.attrib['source'], data.attrib['process'], data.attrib['thread'], data.attrib['methodName'], data.attrib['machine'], data.attrib['user'], data.attrib['elapsed'], data.text ]) source_list_of_msg_list = [] for m in msg_list: sp1 = m.split(',') mapserver = sp1[1].split(':')[1][1:] source_list_of_msg_list.append(mapserver) from collections import Counter counter = Counter(source_list_of_msg_list) for i in counter: sh_counts_times.append([i, counter[i]]) for m in msg_list: sp1 = m.split(',') user = sp1[0].split(':')[1][1:] mapserver = sp1[1].split(':')[1][1:] set_list.append(user + "$$$" + mapserver) from collections import Counter counter = Counter(set_list) for i in counter: a = i.split('$$$') sh_user_usage_count_2.append([a[0], a[1], counter[i]]) wb.save('analysis.xlsx')
def readXml( filename ): #filenameを文字列で渡してあげると、自動的にxmlデータから木構造を作ってくれる。最終的にもどってくるのはrootノードの情報。これをもとに木を巡回する。 with open(filename) as f: it = itertools.chain('<root>', f, '</root>') #もしルートタグがない場合はつける root = ET.fromstringlist(it) return root
def split_file(filename): data = list() results = list() n = 0 with open(filename, 'rb') as f: lines = f.readlines() for i in range(0, len(lines)): line = lines[i] if line.startswith("<?xml") and len(data) > 0: results.append(data) data = list() else: data.append(line) if i == (len(lines) - 1): results.append(data) for result in results: t = ET.ElementTree(ET.fromstringlist(result)) new_filename = "{}-{}".format(filename, n) n += 1 t.write(new_filename, xml_declaration=True, method="xml", encoding="UTF-8") pass
def parse(self, ofx): try: for line in ofx.splitlines(): if line.strip() == "": break header, value = line.split(":") self.headers[header] = value except ValueError: pass finally: if "OFXHEADER" not in self.headers: self.headers["OFXHEADER"] = "100" if "VERSION" not in self.headers: self.headers["VERSION"] = "102" if "SECURITY" not in self.headers: self.headers["SECURITY"] = "NONE" if "OLDFILEUID" not in self.headers: self.headers["OLDFILEUID"] = "NONE" if "NEWFILEUID" not in self.headers: self.headers["NEWFILEUID"] = "NONE" try: tags = ofx.split("<") if len(tags) > 1: tags = ["<" + t.strip() for t in tags[1:]] heirarchy = [] can_open = True for i, tag in enumerate(tags): gt = tag.index(">") if tag[1] != "/": # Is an opening tag if not can_open: tags[i - 1] = tags[i - 1] + "</" + \ heirarchy.pop() + ">" can_open = True tag_name = tag[1:gt].split()[0] heirarchy.append(tag_name) if len(tag) > gt + 1: can_open = False else: # Is a closing tag tag_name = tag[2:gt].split()[0] if tag_name not in heirarchy: # Close tag with no matching open, so delete it tags[i] = tag[gt + 1:] else: # Close tag with matching open, but other open # tags that need to be closed first while(tag_name != heirarchy[-1]): tags[i - 1] = tags[i - 1] + "</" + \ heirarchy.pop() + ">" can_open = True heirarchy.pop() self.xml = ET.fromstringlist(tags) self.load_from_xml(self, self.xml) except Exception: raise InvalidOFXStructureException
def parse_XML(self, output, returncode, isTimeout): #an empty tag cannot be parsed into a tree def sanitizeXML(s): return s.replace("<>", "<emptyTag>") \ .replace("</>", "</emptyTag>") try: tree = ET.fromstringlist(map(sanitizeXML, output)) status = tree.findtext('cprover-status') if status is None: def isErrorMessage(msg): return msg.get('type', None) == 'ERROR' messages = list( filter(isErrorMessage, tree.getiterator('message'))) if messages: # for now, use only the first error message if there are several msg = messages[0].findtext('text') if msg == 'Out of memory': status = 'OUT OF MEMORY' elif msg: status = 'ERROR ({0})'.format(msg) else: status = 'ERROR' else: status = 'INVALID OUTPUT' elif status == "FAILURE": assert returncode == 10 reason = tree.find('goto_trace').find('failure').findtext( 'reason') if not reason: reason = tree.find('goto_trace').find('failure').get( 'reason') if 'unwinding assertion' in reason: status = result.RESULT_UNKNOWN else: status = result.RESULT_FALSE_REACH elif status == "SUCCESS": assert returncode == 0 if "--no-unwinding-assertions" in self.options: status = result.RESULT_UNKNOWN else: status = result.RESULT_TRUE_PROP except Exception: if isTimeout: # in this case an exception is expected as the XML is invalid status = 'TIMEOUT' elif 'Minisat::OutOfMemoryException' in output: status = 'OUT OF MEMORY' else: status = 'INVALID OUTPUT' logging.exception( "Error parsing CBMC output for returncode %d", returncode) return status
def log_response_error(response_error): """ @type response_error: owncloud.ResponseError """ message = response_error.get_resource_body() if message[:38] == '<?xml version="1.0" encoding="utf-8"?>': import xml.etree.ElementTree as ElementTree response_exception = '' response_message = '' response = message[39:] root_element = ElementTree.fromstringlist(response) if root_element.tag == '{DAV:}error': for child in root_element: if child.tag == '{http://sabredav.org/ns}exception': response_exception = child.text if child.tag == '{http://sabredav.org/ns}message': response_message = child.text if response_exception != '': message = 'SabreDAV Exception: %s - Message: %s' % ( response_exception, response_message) logger.error('Unexpected response: Status code: %i - %s' % (response_error.status_code, message)) logger.info('Full Response: %s' % (response_error.get_resource_body()))
def main(): sourceFile, inputFile = parseArguments() if sourceFile: sourceLines = getLinesFromFile(sourceFile) else: sourceLines = [line.strip() for line in sys.stdin] inputLines = getLinesFromFile(inputFile) try: root = ET.fromstringlist(sourceLines) except: exitWithError(errorTypes.xmlNotWellFormated) customAssert(root.tag == "program", errorTypes.xmlStructureSyntaxLex) customAssert(root.get("language") == "IPPcode19", errorTypes.xmlStructureSyntaxLex) for atrib in root.attrib: customAssert(atrib in ("language", "name" , "description"), errorTypes.xmlStructureSyntaxLex) checkXmlHeader(sourceLines) instructionsList = getInstructionsFromXml(root) checkOperandLexems(instructionsList) checkSyntax(instructionsList) checkLabelsSematics(instructionsList) instructionsList = replaceEscapeSequences(instructionsList) interpretCode(instructionsList, inputLines)
def fill_dictionary(cls, result_dir): """ Parsed files.xml and symbols.xml and fill dictionary :return: """ XML_FILES = ['files.xml', 'symbols.xml'] results_dict = {} for tag in settings.CHECKER_TAGS: results_dict[tag] = [] for file_name in [os.path.join(result_dir, x) for x in XML_FILES]: logger.info('Processing %s file.', file_name) try: with open(file_name, "r") as f: lines = f.readlines() lines.insert(0, '<pkgdiff>') lines.append('</pkgdiff>') pkgdiff_tree = ElementTree.fromstringlist(lines) for tag in settings.CHECKER_TAGS: for pkgdiff in pkgdiff_tree.findall('.//' + tag): results_dict[tag].extend([x.strip() for x in pkgdiff.text.strip().split('\n')]) except IOError: continue return results_dict
def extractDeletionKeys(file): """ [String] full path of XML file => [Iterable] key values of trades """ return map(keyValue , filter(isDeletion , ET.fromstringlist(addRemoveHeader(fileToLines(file)))))
def log_response_error(response_error): """ @type response_error: owncloud.ResponseError """ message = response_error.get_resource_body() if message[:38] == '<?xml version="1.0" encoding="utf-8"?>': import xml.etree.ElementTree as ElementTree response_exception = '' response_message = '' response = message[39:] root_element = ElementTree.fromstringlist(response) if root_element.tag == '{DAV:}error': for child in root_element: if child.tag == '{http://sabredav.org/ns}exception': response_exception = child.text if child.tag == '{http://sabredav.org/ns}message': response_message = child.text if response_exception != '': message = 'SabreDAV Exception: %s - Message: %s' % (response_exception, response_message) logger.error('Unexpected response: Status code: %i - %s' % (response_error.status_code, message)) logger.info('Full Response: %s' % (response_error.get_resource_body()))
def getCores(self, xmlData): """ Return a list with all core names inside a xmlData list """ cores = list() root = ET.fromstringlist(xmlData) # for each core in result (getting names) map(lambda x: cores.append(x.text), root.findall(".//*[@name='name']")) return cores
def main(): s = xmlrpc.client.ServerProxy('http://localhost:8000') lines = [] for line in sys.stdin: if line.strip(): lines.append(line.strip()) corpus = ET.fromstringlist(lines) for sentence in corpus: sentnum = sentence.attrib['ref'] tuples = lexsel_util.get_tuples(sentence) surface = [tup[1] for tup in tuples] dprint("[SURFACE]", " ".join(surface)) answers = s.label_sentence(tuples) dprint("[ANSWERS]", answers) ## all the NODE elements in the tree that have a SYN underneath target_nodes = sentence.findall(".//NODE/SYN/..") changed = False for node in target_nodes: changed_here = make_decision(node, answers) if changed_here: changed = True if changed: dprint("[CLASSIFIERSENTENCE]", sentnum) print(ET.tostring(corpus,encoding="unicode"))
def test_variationFontOrigin(self): # Glyphs 2.4.1 introduced a custom parameter “Variation Font Origin” # to specify which master should be considered the origin. # https://glyphsapp.com/blog/glyphs-2-4-1-released masters = [ makeMaster("Family", "Thin", weight=26), makeMaster("Family", "Regular", weight=100), makeMaster("Family", "Medium", weight=111), makeMaster("Family", "Black", weight=190), ] instances = { "data": [ makeInstance("Black", weight=("Black", 900, 190)), makeInstance("Medium", weight=("Medium", 444.4, 111)), makeInstance("Regular", weight=("Regular", 400, 100)), makeInstance("Thin", weight=("Thin", 100, 26)), ], "Variation Font Origin": "Medium", } doc = etree.fromstringlist(self.build_designspace(masters, instances)) medium = doc.find('sources/source[@stylename="Medium"]') self.assertEqual(medium.find("lib").attrib["copy"], "1") weightAxis = doc.find('axes/axis[@tag="wght"]') self.assertEqual(weightAxis.attrib["default"], "444.4")
def _pdf_get_all_pageinfo(infile, log=None): if not log: log = Mock() pdf = pikepdf.open(infile) existing_text = ghostscript.extract_text(infile, pageno=None) existing_text = regex_remove_char_tags.sub(b' ', existing_text) try: root = ET.fromstringlist( [b'<document>\n', existing_text, b'</document>\n']) page_xml = root.findall('page') except ET.ParseError as e: log.error( "An error occurred while attempting to retrieve existing text in " "the input file. Will attempt to continue assuming that there is " "no existing text in the file. The error was:") log.error(e) page_xml = [None] * len(pdf.pages) page_count_difference = len(pdf.pages) - len(page_xml) if page_count_difference != 0: log.error("The number of pages in the input file is inconsistent.") if page_count_difference > 0: page_xml.extend([None] * page_count_difference) pages = [] for n in range(len(pdf.pages)): page = PageInfo(pdf, n, infile, page_xml[n]) pages.append(page) return pages, pdf
def preprocess(input_fps, output_fp, zero_proportion): for input_fp in input_fps: # these XML files don't have a "root" (outermost) node, so we can add one :) contents = itertools.chain('<root>', input_fp, '</root>') root = ElementTree.fromstringlist(contents) # assuming that the input has the structure specified above, this iterates thru all the <sent_tag>s for sent in root.findall('sent/sent_tag'): # write the binary label for this sentence binary_label = 0 if random.random() < zero_proportion else 1 output_fp.write(str(binary_label)) # write the tab-separator output_fp.write('\t') # write the characters for this sentence for line in sent.text.splitlines(): line = line.strip() # remove whitespace if not line: # empty lines continue for char in line: # stop writing when we encounter the '/' character if char == '/': break # FIXME: should this check hanzidentifer.has_chinese(char) ?? output_fp.write(char) # write the newline-separator output_fp.write('\n')
def load_xml_tree(xml_path): """Utility function for loading mujoco xml files that may contain nested include tags Args: xml_path: str a path to a mujoco xml file with include tags to be expanded Returns: tree: ElementTree.Element an element that represents the root node of an xml tree """ with open(xml_path, "r") as f: root = ET.fromstringlist(f.readlines()[16:]) for c in root.findall(".//include"): file = c.attrib['file'] target = os.path.join(os.path.dirname(xml_path), file) p = root.find(f".//include[@file='{file}']...") i = list(p).index(c) p.remove(c) for s in reversed(load_xml_tree(target)): p.insert(i, s) return root
def from_stringlist(sequence): try: element = ElementTree.fromstringlist(sequence) except ElementTree.ParseError as e: raise VaspParseError(e) return from_element(element)
def retrieve_xml_post_information(): posts = open("./sampledata_1/Posts.xml", "r") root_node = ET.fromstringlist(posts) post_dict = {} title_dict = {} answers_dict = {} for child in range(len(root_node)): post_type_id = root_node[child].get('PostTypeId') if post_type_id == '1': post_id = root_node[child].get('Id') body_text = root_node[child].get('Body') if post_id not in post_dict: post_dict[post_id] = [body_text] else: post_dict[post_id].append(body_text) title = root_node[child].get('Title') if post_id not in title_dict: title_dict[post_id] = [title] else: title_dict[post_id].append(title) elif post_type_id == '2': post_id = root_node[child].get('ParentId') answers = root_node[child].get('Body') if post_id not in answers_dict: answers_dict[post_id] = [answers] else: answers_dict[post_id].append(answers) return post_dict, title_dict, answers_dict
def get_xml_file(xml_file: str) -> ElementTree.Element: """ Gets an XML File from the data.tar.gz file :param xml_file: The name of the XML File to parse :return: The Element Tree of the XML File """ return ElementTree.fromstringlist(get_file(xml_file))
def get_scheme_letters(inputfile): # Mark '&' correctly because it is sometimes used incorrectly in generated # xmls. fixed_input = [] with open(inputfile) as input: for line in input: fixed_input.append(re.sub('&(?!amp;)', '&', line)) try: root = ET.fromstringlist(fixed_input) except Exception as error: print('Failed analyzing', inputfile) print(error) return None, None # Parse rhyme scheme. # Change Prolog format into JSON to be parsed as a dictionary. scheme = root[2][0].attrib['Stanza-based_Rhyme_Schemes'] scheme = scheme.replace('-', '\":\"' '').replace('[', '{\"').replace(']', '\"}').replace( ',', '\",\"') scheme = '{\"scheme\":' + scheme.replace('\"{', '{').replace('}"', '}') + '}' # Get rid of empty values with regex. scheme = re.sub('\".?\":\{\"\"\},', '', scheme) scheme = json.loads(scheme) scheme_letters = {} for stanza in scheme['scheme'].values(): scheme_letters.update(stanza) return scheme_letters, root
def standardize_file_target(file_target): """For file targets that are not source files, return the target that generated them. This is needed because rdeps of generated files do not include targets that reference their generating rules. https://github.com/bazelbuild/bazel/issues/4949 """ query_result = bazel_query(['--output=xml', file_target]) if not query_result: sys.exit( "Empty query response for {}. It is probably not handled by bazel". format(file_target)) target_xml = ElementTree.fromstringlist(query_result.split('\n')) source_element = target_xml.find('source-file') if source_element is not None: return file_target generated_element = target_xml.find('generated-file') if generated_element is not None: return generated_element.get('generating-rule') sys.exit("Error parsing query xml for " + file_target + ":\n" + query_result)
def main(): data = None with open(ida_kernwin.ask_file(0, "*.xml", "Select a file to import")) as f: data = f.readlines() if data is None: return ida_kernwin.show_wait_box("Importing file") fix_xml(data) make_basic_structs() tree = et.fromstringlist(data) if (tree is None): ida_kernwin.hide_wait_box() ida_kernwin.warning("Something bad happened :(") return global IMPORT_VTABLE IMPORT_VTABLE = ida_kernwin.ask_yn( 1, "Import virtual tables for classes? (Longer)") for i in tree: parse_class(i) ida_kernwin.hide_wait_box()
def main(): # ignore SIGCHLD, prevent the zombie apocalypse signal.signal(signal.SIGCHLD, signal.SIG_IGN) utils.drop_privileges() bad_regex = re.compile("[,()]+") # avoid forbidden by TSD symbols while True: try: if vstats == "all": stats = subprocess.Popen( ["varnishstat", "-1", "-x"], stdout=subprocess.PIPE, ) else: fields = ",".join(vstats) stats = subprocess.Popen( ["varnishstat", "-1", "-f" + fields, "-x"], stdout=subprocess.PIPE, ) except OSError, e: # Die and signal to tcollector not to run this script. sys.stderr.write("Error: %s\n" % e) sys.exit(13) metrics = "" for line in stats.stdout.readlines(): metrics += line metrics = ET.fromstringlist(metrics) timestamp = "" if use_varnishstat_timestamp: pattern = "%Y-%m-%dT%H:%M:%S" timestamp = int( time.mktime(time.strptime(metrics['timestamp'], pattern))) else: timestamp = time.time() for stat in metrics.findall('stat'): tags = "" k = stat.findtext('name') if None == bad_regex.search(k): stattype = stat.findtext('type') if stattype == None: metric_name = metric_prefix + "." + k elif stattype == "LCK": metric_name = metric_prefix + ".locks." + k ident = stat.findtext('ident') tags = "ident=" + ident elif stattype == "SMA": metric_name = metric_prefix + ".storage." + k ident = stat.findtext('ident') tags = "ident=" + ident else: continue print "%s %d %s %s" % \ (metric_name, timestamp, stat.findtext('value'), tags) sys.stdout.flush() time.sleep(interval)
def get_model(serial_number): """A helper function to get the friendly model. Args: serial_number: Devices' Serial Number. Returns: stdout: friendly model name or "". """ if len(serial_number) == 12: lookup_code = serial_number[-4:] elif len(serial_number) == 11: lookup_code = serial_number[-3:] else: print("Unexpected serial number length: {}".format(serial_number)) return "" lookup_url = "https://support-sp.apple.com/sp/product?cc={lookup_code}".format( lookup_code=lookup_code) xml = urllib.urlopen(lookup_url).read() try: tree = ElementTree.fromstringlist(xml) model_friendly = tree.find('.//configCode').text return model_friendly except ElementTree.ParseError as err: print("Failed to retrieve model name: {}".format(err.strerror)) return ""
def read_xml(path): """通过ElementTree获取 大部分字符串内容都能读取出来,但是如果<string></string>标签内嵌套了子标签, 那么子标签内的内容读取不出来,并且只能读取到第一个子标签前的内容 :param path: :return: """ if path is None or len(path) == 0: Log().error('file path is None') return file = open(path, encoding='utf-8') string_list = file.read() root = ElementTree.fromstringlist(string_list) item_list = root.findall('string') keys = [] values = [] for item in item_list: key = item.attrib['name'] value = item.text keys.append(key) values.append(value) file.close() return keys, values
def parse(self, filename): self.last_filter = [] root = None with open(filename) as ref: root = ET.fromstringlist(['<root>', ref.read(), '</root>']) self.process(root) return root
def parse(self, ofx): try: for line in ofx.splitlines(): if line.strip() == "": break header, value = line.split(":") self.headers[header] = value except ValueError: pass finally: if "OFXHEADER" not in self.headers: self.headers["OFXHEADER"] = "100" if "VERSION" not in self.headers: self.headers["VERSION"] = "102" if "SECURITY" not in self.headers: self.headers["SECURITY"] = "NONE" if "OLDFILEUID" not in self.headers: self.headers["OLDFILEUID"] = "NONE" if "NEWFILEUID" not in self.headers: self.headers["NEWFILEUID"] = "NONE" try: tags = ofx.split("<") if len(tags) > 1: tags = ["<" + t.strip() for t in tags[1:]] heirarchy = [] can_open = True for i, tag in enumerate(tags): gt = tag.index(">") if tag[1] != "/": # Is an opening tag if not can_open: tags[i - 1] = tags[i - 1] + "</" + \ heirarchy.pop() + ">" can_open = True tag_name = tag[1:gt].split()[0] heirarchy.append(tag_name) if len(tag) > gt + 1: can_open = False else: # Is a closing tag tag_name = tag[2:gt].split()[0] if tag_name not in heirarchy: # Close tag with no matching open, so delete it tags[i] = tag[gt + 1:] else: # Close tag with matching open, but other open # tags that need to be closed first while (tag_name != heirarchy[-1]): tags[i - 1] = tags[i - 1] + "</" + \ heirarchy.pop() + ">" can_open = True heirarchy.pop() self.xml = ET.fromstringlist(tags) self.load_from_xml(self, self.xml) except Exception: raise InvalidOFXStructureException
def split_file(filename): """ Split the input file into separate files, each containing a single patent. As a hint - each patent declaration starts with the same line that was causing the error found in the previous exercises. The new files should be saved with filename in the following format: "{}-{}".format(filename, n) where n is a counter, starting from 0. """ data=[] results=[] n=0 with open(filename,"rb") as f: flines=f.readlines() for i in range(len(flines)): line=flines[i] if line.startswith("<?xml") and len(data) >0: results.append(data) data=[] else: data.append(line) if (i==len(flines)-1): results.append(data) for res in results: tre=ET.ElementTree(ET.fromstringlist(res)) newfile="{}-{}".format(filename,n) n+=1 tre.write(newfile,xml_declaration=True,method="xml",encoding="UTF-8")
def main(): # ignore SIGCHLD, prevent the zombie apocalypse signal.signal(signal.SIGCHLD, signal.SIG_IGN) utils.drop_privileges() bad_regex = re.compile("[,()]+") # avoid forbidden by TSD symbols while True: try: if vstats == "all": stats = subprocess.Popen( ["varnishstat", "-1", "-x"], stdout=subprocess.PIPE, ) else: fields = ",".join(vstats) stats = subprocess.Popen( ["varnishstat", "-1", "-f" + fields, "-x"], stdout=subprocess.PIPE, ) except OSError, e: # Die and signal to tcollector not to run this script. sys.stderr.write("Error: %s\n" % e) sys.exit(13) metrics = "" for line in stats.stdout.readlines(): metrics += line metrics = ET.fromstringlist(metrics) timestamp = "" if use_varnishstat_timestamp: pattern = "%Y-%m-%dT%H:%M:%S" timestamp = int(time.mktime(time.strptime(metrics['timestamp'], pattern))) else: timestamp = time.time() for stat in metrics.findall('stat'): tags = "" k = stat.findtext('name') if None == bad_regex.search(k): stattype = stat.findtext('type') if stattype == None: metric_name = metric_prefix + "." + k elif stattype == "LCK": metric_name = metric_prefix + ".locks." + k ident = stat.findtext('ident') tags = "ident=" + ident elif stattype == "SMA": metric_name = metric_prefix + ".storage." + k ident = stat.findtext('ident') tags = "ident=" + ident else: continue print "%s %d %s %s" % \ (metric_name, timestamp, stat.findtext('value'), tags) sys.stdout.flush() time.sleep(interval)
def parse_XML(self, output, returncode, isTimeout): # an empty tag cannot be parsed into a tree def sanitizeXML(s): return s.replace("<>", "<emptyTag>").replace("</>", "</emptyTag>") try: tree = ElementTree.fromstringlist(map(sanitizeXML, output)) status = tree.findtext("cprover-status") if status is None: def isErrorMessage(msg): return msg.get("type", None) == "ERROR" messages = list(filter(isErrorMessage, tree.getiterator("message"))) if messages: # for now, use only the first error message if there are several msg = messages[0].findtext("text") if msg == "Out of memory": status = "OUT OF MEMORY" elif msg == "SAT checker ran out of memory": status = "OUT OF MEMORY" elif msg: status = "ERROR ({0})".format(msg) else: status = "ERROR" else: status = "INVALID OUTPUT" elif status == "FAILURE": assert returncode == 10 reason = tree.find("goto_trace").find("failure").findtext("reason") if not reason: reason = tree.find("goto_trace").find("failure").get("reason") if "unwinding assertion" in reason: status = result.RESULT_UNKNOWN else: status = result.RESULT_FALSE_REACH elif status == "SUCCESS": assert returncode == 0 if "--unwinding-assertions" in self.options: status = result.RESULT_TRUE_PROP else: status = result.RESULT_UNKNOWN except Exception: if isTimeout: # in this case an exception is expected as the XML is invalid status = "TIMEOUT" elif "Minisat::OutOfMemoryException" in output: status = "OUT OF MEMORY" else: status = "INVALID OUTPUT" logging.exception( "Error parsing CBMC output for returncode %d", returncode ) return status
def export_as_TEI (self, request, queryset): entries = ['<listBibl>'] entries.extend(queryset.values_list('tei_entry', flat=True)) entries.append('</listBibl>') root = ElementTree.fromstringlist(entries) tei = ElementTree.tostring(root, encoding='utf-8') response = HttpResponse(tei, mimetype='text/xml') return response
def get_totals(boundingbox): search = flickr.photos_search(min_upload_date='2013-01-01', bbox=boundingbox, accuracy=16, extras='geo,date_taken,tags') root = ET.fromstringlist(search, parser=None) for child in root: print child.tag, child.attrib
def __parseLines(self,lines): variables = {} result = ET.fromstringlist(lines) for element in list(result): variables[element.tag]=element.text return variables
def tagTimeContent(self, tagname, time): self.tree = ET.fromstringlist(self.makeTimeRequest(self, time)) for elt in self.tree.iter(): if elt.tag == str(tagname): self.tagvalue = elt.text.strip() return self.tagvalue
def _run_info_parser(run_info): result = dict() if len(run_info['result']) > 0: root = ET.fromstringlist(run_info['result']) result = dict( reads=[r.attrib for r in root.iter('Read')], fc_layout=[fc.attrib for fc in root.iter('FlowcellLayout')], ) return result
def parse_XML(self, output, returncode, isTimeout): #an empty tag cannot be parsed into a tree def sanitizeXML(s): return s.replace("<>", "<emptyTag>") \ .replace("</>", "</emptyTag>") try: tree = ET.fromstringlist(map(sanitizeXML, output)) status = tree.findtext('cprover-status') if status is None: def isErrorMessage(msg): return msg.get('type', None) == 'ERROR' messages = list(filter(isErrorMessage, tree.getiterator('message'))) if messages: # for now, use only the first error message if there are several msg = messages[0].findtext('text') if msg == 'Out of memory': status = 'OUT OF MEMORY' elif msg: status = 'ERROR (%s)'.format(msg) else: status = 'ERROR' else: status = 'INVALID OUTPUT' elif status == "FAILURE": assert returncode == 10 reason = tree.find('goto_trace').find('failure').findtext('reason') if not reason: reason = tree.find('goto_trace').find('failure').get('reason') if 'unwinding assertion' in reason: status = result.RESULT_UNKNOWN else: status = result.RESULT_FALSE_REACH elif status == "SUCCESS": assert returncode == 0 if "--no-unwinding-assertions" in self.options: status = result.RESULT_UNKNOWN else: status = result.RESULT_TRUE_PROP except Exception: if isTimeout: # in this case an exception is expected as the XML is invalid status = 'TIMEOUT' elif 'Minisat::OutOfMemoryException' in output: status = 'OUT OF MEMORY' else: status = 'INVALID OUTPUT' logging.exception("Error parsing CBMC output for returncode %d" % (returncode)) return status
def get_word_count(html): """Get word count for given html. :param html: html string to count """ try: root = etree.fromstringlist('<doc>{0}</doc>'.format(html)) text = etree.tostring(root, encoding='unicode', method='text') return get_text_word_count(text) except ParseError: return get_text_word_count(html)
def __iter__(self): root = ElementTree.fromstringlist(codecs.iterencode(self.inputreader, 'utf')) table = root.findall('.//table')[5] for row in table[2:-1]: date_str = row.find('.//td[3].nobr').text tdate = datetime.strptime(date_str, '%d.%m.%Y') amount_str = row.find('.//td[5].nobr').text amount_str = amount_str.replace('.', '') tamount = float(normalize_num(amount_str)) desc = plain_content(row.find('.//td[4]')) tmessage = normalize_field(desc) yield TransactionData(tdate, tamount, message=tmessage)
def test1(): import os import xml.etree.ElementTree as et from xml.etree.ElementTree import XMLParser as fuckyouall from xml.etree.ElementTree import ParseError as fuckit curdir = os.path.abspath(os.path.curdir) file_ = ' AniDB.net Person - Hanazawa Kana .html' file_path = os.path.join(curdir, 'lists', file_) with open(file_path) as f: tree = et.fromstringlist(f.readlines(), fuckyouall(html=True)) root = tree.getroot() for table in root.findall('table'): print 'table'
def convert(idir, odir, subpath): """ read XML file, write tweaked XML file """ # Is there a cleaner way to do this? parser = ET.XMLParser(encoding='utf-8') prefix = fp.splitext(subpath)[0] ifile = fp.join(idir, subpath) with codecs.open(ifile, 'r', 'utf-8') as fin: utext = fin.read().encode('utf-8') tree = ET.fromstringlist([utext], parser=parser) _remove_boring_parts(tree) oprefix = fp.join(odir, prefix) if not fp.exists(oprefix): os.makedirs(oprefix) _write_items(tree, fp.join(oprefix, prefix))
def xml_to_df(self, xml_str): ''' Transforms the xml in a string into a data frame : xml_str(str): string with the xml to be transformed in dataframe ''' # convert to dataframe attr_list = [] try: tree = ElementTree.fromstringlist(xml_str) except: return None for node in tree.iter('row'): attr_list.append(dict(zip(node.attrib.keys(), node.attrib.values()))) return DataFrame(attr_list)
def __parse_product(prod): root = ET.fromstringlist(prod) name = root.find('./name').text arch = root.find('./arch').text try: version = root.find('./baseversion').text sp = root.find('./patchlevel').text if root.find('./patchlevel').text != '0' else "" version += "-SP{}".format(sp) if sp else "" except AttributeError: version = root.find('./version').text logger.debug("simpleversion") # CAASP uses ALL for update repos and there is only one supported version at time # can change in tommorow if name == "CAASP": version = "ALL" return (name, version, arch)
def parseXML(file_chosen): xmlInfo = file_chosen.readlines() # Disallow entities for now # because we're using XSD for line in xmlInfo: if "!ENTITY" in line: raise BadXMLException("Bad XML File") if not validateXML(xmlInfo): raise ValdiationFailedException() try: tree = ET.fromstringlist(xmlInfo) return tree except Exception: raise BadXMLException("Parse Failed")
def set_cloud_env(env): # get lines need to be replaced removed_lines = "" with file(rpds_config_file) as f: lines = f.readlines() got_header = False for i in range(0, len(lines)): if config_cloud_env_header in lines[i]: got_header = True if got_header: removed_lines = removed_lines + lines[i] if config_cloud_env_tail in lines[i]: got_header = False # prepare url according to env media_url = "" users_url = "" status_url = "" app_url = "" if env == 'int' or env == 'int2': media_url = server_media_url.format(env) users_url = server_users_url.format(env) status_url = server_status_url.format(env) app_url = server_app_url.format(env) elif env == 'prod': media_url = server_media_url.replace("{0}.", "") users_url = server_users_url.replace("{0}.", "") status_url = server_status_url.replace("{0}.", "") app_url = server_app_url.replace("{0}.", "") # set env root = ET.fromstringlist(removed_lines) for el in root.findall("Var"): if "ServerMediaURL" in el.attrib: el.attrib["ServerMediaURL"] = media_url if "ServerUsersURL" in el.attrib: el.attrib["ServerUsersURL"] = users_url if "ServerStatusURL" in el.attrib: el.attrib["ServerStatusURL"] = status_url if "ServerAppURL" in el.attrib: el.attrib["ServerAppURL"] = app_url # change config file replace_lines = ET.tostring(root) + "\n" replace_string_in_file(rpds_config_file, removed_lines, replace_lines)
def main(): lines = [] for line in sys.stdin: if line.strip(): lines.append(line.strip()) corpus = ET.fromstringlist(lines) dprint("!" * 80) lexsel_util.get_tuples(corpus) dprint(lexsel_util.prettify(corpus)) dprint("!" * 80) ## find all the NODE elements in the tree that have a SYN underneath them target_nodes = corpus.findall(".//NODE/SYN/..") for node in target_nodes: make_decision(node) print(ET.tostring(corpus,encoding="unicode"))
def getWeather(city,country): import suds from xml.etree import ElementTree import time,socket #time.sleep(n) host = socket.gethostname() resultmsg = '' result = '' client = suds.client.Client("http://www.webservicex.net/globalweather.asmx?WSDL") try: result = client.service.GetWeather(city.strip(),country.strip()) temp = result.split("\n") temp.remove(temp[0]) eparse = ElementTree.fromstringlist(temp) for it in eparse: resultmsg += it.tag.ljust(20)+it.text.strip()+"\n" except suds.WebFault, e: resultmsg = "Error\n"
def convert(ifile): """ Return a list of date, string tuples for each row in the table """ # The data is actually iso-8859-1 converted but it # contains entities which are defined elsewhere, # so without access to the DTD, we manually translate # the entities to unicode chars, encode everything # as 'utf-8' and then feed it to a forced-utf-8 # XML parser # # Is there a cleaner way to do this? parser = ET.XMLParser(encoding='utf-8') with codecs.open(ifile, 'r', 'iso-8859-1') as fin: utext = unescape(fin.read()).encode('utf-8') tree = ET.fromstringlist([utext], parser=parser) return concat_l(_convert_section(x) for x in tree.findall('section'))
def get_metadata(acc): meta = { 'Molecule Type': '', 'Topology': '', 'Organism': '', 'Taxonomy': '', 'Taxon ID': '', 'Strain': '', 'Description': '', 'Keywords': '', 'Reference Location': '', 'PubMed ID': '', 'Notes': '', 'URL': '', 'Sequence': '', 'First Release': '', 'Last Updated': '' } url = "http://www.ebi.ac.uk/ena/data/view/%s" % acc meta['URL'] = url url += "&display=xml" xml = '' try: xml = urllib2.urlopen(url) except urllib2.URLError: print "Error accessing %s" % url # parse root = ET.fromstringlist(xml.readlines()) # check if valid if "entry is not found" in root.text: return meta print acc # get interesting elements entry = root.find('entry') if entry is None: return meta [meta['Molecule Type'], meta['Topology'], meta['Keywords'], meta['First Release'], meta['Last Updated']] = _entry_metadata(entry) description = entry.find('description') meta['Description'] = description.text if description is not None else '' source = entry.find("feature[@name='source']") [meta['Taxon ID'], meta['Taxonomy'], meta['Organism'], meta['Strain']] = _source_metadata(source) reference = entry.find("reference[@type='article']") [meta['Reference Location'], meta['PubMed ID']] = _reference_metadata(reference) return meta
def loadTraceXml(self, stream): xmlstringlist = [] data = [] for line in stream: if line[0]=="#": xmlstringlist.append(line.lstrip("# ")) else: data.append( list(map(float,line.split())) ) root = ElementTree.fromstringlist(xmlstringlist) columnspec = ColumnSpec.fromXmlElement(root.find("./Variables/ColumnSpec")) for colname, d in zip(columnspec, zip(*data)): if math.isnan(d[-1]): a = numpy.array(d[0:-1]) else: a = numpy.array(d) self[colname] = a tpelement = root.find("./Variables/TracePlottingList") self.description["tracePlottingList"] = TracePlottingList.fromXmlElement(tpelement) if tpelement is not None else None for element in root.findall("./Variables/Element"): self.varFromXmlElement(element, self.description)
def status(self): """ Query HCP for the status of the request log download. :returns: a *collection.OrderedDict*: :: { readyForStreaming: bool, streamingInProgress: bool, started: bool, error: bool, content: list # one or more of: L_ACCESS, L_SYSTEM, # L_SERVICE, L_APPLICATION) } :raises: re-raises whatever is raised below """ self.logger.debug('status query issued') try: self.con.GET('/mapi/logs') except Exception as e: self.logger.error(e) raise else: self.logger.debug('response headers: {}'.format(self.con.getheaders())) xml = self.con.read().decode() time.sleep(.5) if self.con.response_status != 200: return None else: stat = OrderedDict() for child in Et.fromstringlist(xml): if child.text == 'true': stat[child.tag] = True elif child.text == 'false': stat[child.tag] = False else: stat[child.tag] = child.text.split(',') return stat
def _run_parameters_parser(run_parameters): result = dict() if len(run_parameters['result']) > 0: root = ET.fromstringlist(run_parameters['result']) result = dict( run_info=dict( run_id=list(root.iter('RunID')).pop(0).text if len(list(root.iter('RunID'))) else '', fc_id=list(root.iter('Barcode')).pop(0).text if len(list(root.iter('Barcode'))) else '', date=list(root.iter('RunStartDate')).pop(0).text if len( list(root.iter('RunStartDate'))) else '', scanner_id=list(root.iter('ScannerID')).pop(0).text if len( list(root.iter('ScannerID'))) else '', scanner_number=list(root.iter('ScannerNumber')).pop(0).text if len( list(root.iter('ScannerNumber'))) else '', ), reads=[r.attrib for r in root.iter('Read')], reagents=dict( sbs=dict( kit=list(root.iter('Sbs')).pop(0).text if len(list(root.iter('Sbs'))) else '', id=list(root.iter('SbsReagentKit')).pop(0).find('ID').text if len( list(root.iter('SbsReagentKit'))) else '', ), index=dict( kit=list(root.iter('Index')).pop(0).text if len(list(root.iter('Index'))) else '', id=list(r.find('ReagentKit').find('ID').text for r in root.iter('Index') if r.find('ReagentKit') is not None).pop() if len(list( r.find('ReagentKit').find('ID').text for r in root.iter('Index') if r.find('ReagentKit') is not None)) else '', ), pe=dict( kit=list(root.iter('Pe')).pop(0).text if len(list(root.iter('Pe'))) else '', id=list(r.find('ReagentKit').find('ID').text for r in root.iter('Pe') if r.find('ReagentKit') is not None).pop() if len(list( r.find('ReagentKit').find('ID').text for r in root.iter('Pe') if r.find('ReagentKit') is not None)) else '', ), ), ) return result