def _xml_to_dataframe(xml_list): ''' Takes a list of XML strings and turns it into a dataframe ''' # Get the header xml_str = xml_list[0] root = etree.fromstring(xml_str) header = _extract_position_names(root) scenario_data = {} for position_name in header: scenario_data[position_name] = [] for xml_str in xml_list: scenario = etree.fromstring(xml_str) positions = scenario[4] for position in positions: position_size = position.text.replace(',', '') scenario_data[position.get('id')].append(float(position_size)) # Get dates scenario_dates = [] for element in xml_list: scenario = etree.fromstring(element) date_string = scenario[1].text date = datetime.datetime.strptime(date_string, '%Y%m%d') scenario_dates.append(date) df = pandas.DataFrame(scenario_data, index=scenario_dates) return df
def validate(self): if not hasattr(self, 'xsd'): self.xsd=etree.XMLSchema(etree.parse(XSD)) xml = etree.fromstring(self.Xml) return self.xsd.validate(xml)
def WritePolygonFile(self, polygon, logger): with open(self.polygon_file, "w") as fp: # Check XML validity and standardize representation utils.PrintAndLog("Checking polygon") xml = etree.ElementTree(etree.fromstring(str(polygon))) utils.PrintAndLog("Writing polygon") xml.write(fp, xml_declaration=True, encoding='UTF-8') utils.PrintAndLog("SUCCESS", logger, None)
def get_fb2_meta(abs_file_path, file_path, file_name, file_extension): ns = { 'fb': 'http://www.gribuser.ru/xml/fictionbook/2.0', 'l': 'http://www.w3.org/1999/xlink', } tree = etree.fromstring(open(abs_file_path).read()) def get_author(element): last_name = element.xpath('fb:last-name/text()', namespaces=ns) if len(last_name): last_name = last_name[0] else: last_name = '' middle_name = element.xpath('fb:middle-name/text()', namespaces=ns) if len(middle_name): middle_name = middle_name[0] else: middle_name = '' first_name = element.xpath('fb:first-name/text()', namespaces=ns) if len(first_name): first_name = first_name[0] else: first_name = '' return first_name + ' ' + middle_name + ' ' + last_name authors = tree.xpath( '/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns) author = str(", ".join(map(get_author, authors))) title = tree.xpath( '/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns) if len(title): title = str(title[0]) else: title = file_name description = tree.xpath( '/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns) if len(description): description = str(description[0]) else: description = '' return BookMeta(title=title, author=author, cover_path="", description=description, tags="", series="", series_id="", languages="", file_path=file_path, file_name=file_name, file_extension=file_extension, file_size=os.path.getsize(abs_file_path))
def get_height_and_width(self, idx): # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) return data_height, data_width
def attributes_to_dict(line): """Parses xml row into python dict""" try: parsed = etree.fromstring(line) ret = {} for key in parsed.keys(): ret[key] = parsed.get(key) except (etree.XMLSyntaxError): print('Error encountered while trying to parse: ', line) return ret
def _parsed_msg(self): """parse self._body xml string and set it to self.__dict__ """ modified_key = self._modify_key("_parsed_msg") if modified_key in self.__dict__: return self.__dict__[modified_key] if self._msg is None: raise ValueError(u"self._msg should not be None") self.__dict__[modified_key] = etree.fromstring(self._msg) return self.__dict__[modified_key]
def defaultConfig(): cfg = Element("MetamergeConfig") fld = SubElement(cfg, "Folder") fld.attrib['name'] = "Config" #print ET.tostring(cfg) xml = ET.fromstring(defcfgxml) #print ET.tostring(xml) for child in xml.findall('./*'): #print "\n>> " + ET.tostring(child) fld.append(child) #print "\n-- " + ET.tostring(subel) #print ET.tostring(cfg) #print ET.tostring(cfg) return cfg
def alpino_parse(sent, host='zardoz.service.rug.nl', port=42424): s = socket.socket(socket.AF_INET,socket.SOCK_STREAM) s.connect((host,port)) sent = sent + "\n\n" sentbytes= sent.encode('utf-8') s.sendall(sentbytes) bytes_received= b'' while True: byte = s.recv(8192) if not byte: break bytes_received += byte #print(bytes_received.decode('utf-8'), file=sys.stderr) xml = etree.fromstring(bytes_received) return xml
def alpino_parse(sent, host='zardoz.service.rug.nl', port=42424): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((host, port)) sent = sent + "\n\n" sentbytes = sent.encode('utf-8') s.sendall(sentbytes) bytes_received = b'' while True: byte = s.recv(8192) if not byte: break bytes_received += byte #print(bytes_received.decode('utf-8'), file=sys.stderr) xml = etree.fromstring(bytes_received) return xml
def coco_index(self, idx): """ 该方法是专门为pycocotools统计标签信息准备,不对图像和标签作任何处理 由于不用去读取图片,可大幅缩减统计时间 Args: idx: 输入需要获取图像的索引 """ # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] data_height = int(data["size"]["height"]) data_width = int(data["size"]["width"]) # img_path = os.path.join(self.img_root, data["filename"]) # image = Image.open(img_path) # if image.format != "JPEG": # raise ValueError("Image format not JPEG") boxes = [] labels = [] iscrowd = [] for obj in data["object"]: xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) boxes.append([xmin, ymin, xmax, ymax]) labels.append(self.class_dict[obj["name"]]) iscrowd.append(int(obj["difficult"])) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd return (data_height, data_width), target
def prettyPrintXml(self,): # Generate xml text xmlText = self.xmlData.decode() self.xmlText = xmlText if(self.xmlText is not None and len(self.xmlText) > 0): #print('***xml Text:\n' + str(self.xmlText)) rootElement = etree.fromstring(self.xmlText) prettyPrintText=etree.tostring(rootElement, pretty_print=True).decode() if(prettyPrintText is not None and len(prettyPrintText) > 0): self.xmlText = prettyPrintText if (self.xmlFile is not None): elementTree = etree.ElementTree(rootElement) elementTree.write(self.xmlFile, pretty_print=True, encoding='utf-8') else: print('Not writing xml text to file, because None was provided for xmlFile parameter')
def __getitem__(self, idx): # read xml xml_path = self.xml_list[idx] with open(xml_path) as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = self.parse_xml_to_dict(xml)["annotation"] img_path = os.path.join(self.img_root, data["filename"]) image = Image.open(img_path) if image.format != "JPEG": raise ValueError("Image format not JPEG") boxes = [] labels = [] iscrowd = [] for obj in data["object"]: xmin = float(obj["bndbox"]["xmin"]) xmax = float(obj["bndbox"]["xmax"]) ymin = float(obj["bndbox"]["ymin"]) ymax = float(obj["bndbox"]["ymax"]) boxes.append([xmin, ymin, xmax, ymax]) labels.append(self.class_dict[obj["name"]]) iscrowd.append(int(obj["difficult"])) # convert everything into a torch.Tensor boxes = torch.as_tensor(boxes, dtype=torch.float32) labels = torch.as_tensor(labels, dtype=torch.int64) iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64) image_id = torch.tensor([idx]) area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) target = {} target["boxes"] = boxes target["labels"] = labels target["image_id"] = image_id target["area"] = area target["iscrowd"] = iscrowd if self.transforms is not None: image, target = self.transforms(image, target) return image, target
def send_cmd_int(cls, cmd, msg_type): '''Construct NX-API message. Send commands through NX-API. Only single command for show commands. Internal usage''' if msg_type == "cli_show" or msg_type == "cli_show_ascii": if " ;" in cmd: raise cmd_exec_error("Only single show command supported in internal api") req_msg_str = cls.req_obj.get_req_msg_str(msg_type=msg_type, input_cmd=cmd, out_format=cls.out_format, do_chunk=cls.do_chunk, sid=cls.sid) (resp_headers, resp_str) = \ cls.req_fetcher.get_resp(req_msg_str, cls.cookie, cls.timeout) if 'Set-Cookie' in resp_headers: cls.cookie = resp_headers['Set-Cookie'] content_type = resp_headers['Content-Type'] root = etree.fromstring(resp_str) body = root.findall('.//body') code = root.findall('.//code') msg = root.findall('.//msg') output = "" status = 0 if len(body) != 0: if msg_type == 'cli_show': output = etree.tostring(body[0]) else: output = body[0].text if output == None: output = "" if code[0].text == "200": status = 0 else: status = int(code[0].text) return [output, status, msg[0].text]
def test_tree_generation(self): with open('tests/instruments/companies.xml') as fp: tree = etree.fromstring(remove_nbsp(fp.read()), parser=self.parser) document_tree = create_document_tree(tree) with open('tests/companiesact_doc_tree.json') as fp: self.assertEqual(json.dumps(document_tree), fp.read().strip())
file_in = "osim-rl/osim/models/gait9dof18musc.osim" file_out = "mujoco_gait9dof18musc.xml" import xml.etree from lxml import etree parser = etree.XMLParser(recover=True) e = etree.fromstring(xmlstring, parser=parser) e = xml.etree.ElementTree.parse('thefile.xml').getroot()
def parse_with_lxml(): root = etree.fromstring(CONTENT) for log in root.xpath("//log"): print log.text