示例#1
0
def _xml_to_dataframe(xml_list):
    ''' Takes a list of XML strings and turns it into a dataframe '''

    # Get the header
    xml_str = xml_list[0]
    root = etree.fromstring(xml_str)
    header = _extract_position_names(root)

    scenario_data = {}

    for position_name in header:
        scenario_data[position_name] = []

    for xml_str in xml_list:
        scenario = etree.fromstring(xml_str)
        positions = scenario[4]
        for position in positions:
            position_size = position.text.replace(',', '')
            scenario_data[position.get('id')].append(float(position_size))

    # Get dates
    scenario_dates = []
    for element in xml_list:
        scenario = etree.fromstring(element)
        date_string = scenario[1].text
        date = datetime.datetime.strptime(date_string, '%Y%m%d')
        scenario_dates.append(date)
    df = pandas.DataFrame(scenario_data, index=scenario_dates)

    return df
示例#2
0
    def validate(self):

        if not hasattr(self, 'xsd'):
            self.xsd=etree.XMLSchema(etree.parse(XSD))
        xml = etree.fromstring(self.Xml)

        return self.xsd.validate(xml)
示例#3
0
 def WritePolygonFile(self, polygon, logger):
     with open(self.polygon_file, "w") as fp:
         # Check XML validity and standardize representation
         utils.PrintAndLog("Checking polygon")
         xml = etree.ElementTree(etree.fromstring(str(polygon)))
         utils.PrintAndLog("Writing polygon")
         xml.write(fp, xml_declaration=True, encoding='UTF-8')
         utils.PrintAndLog("SUCCESS", logger, None)
示例#4
0
文件: fb2.py 项目: incidunt/51-read
def get_fb2_meta(abs_file_path, file_path, file_name, file_extension):
    ns = {
        'fb': 'http://www.gribuser.ru/xml/fictionbook/2.0',
        'l': 'http://www.w3.org/1999/xlink',
    }

    tree = etree.fromstring(open(abs_file_path).read())

    def get_author(element):
        last_name = element.xpath('fb:last-name/text()', namespaces=ns)
        if len(last_name):
            last_name = last_name[0]
        else:
            last_name = ''
        middle_name = element.xpath('fb:middle-name/text()', namespaces=ns)
        if len(middle_name):
            middle_name = middle_name[0]
        else:
            middle_name = ''
        first_name = element.xpath('fb:first-name/text()', namespaces=ns)
        if len(first_name):
            first_name = first_name[0]
        else:
            first_name = ''
        return first_name + ' ' + middle_name + ' ' + last_name

    authors = tree.xpath(
        '/fb:FictionBook/fb:description/fb:title-info/fb:author',
        namespaces=ns)
    author = str(", ".join(map(get_author, authors)))

    title = tree.xpath(
        '/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()',
        namespaces=ns)
    if len(title):
        title = str(title[0])
    else:
        title = file_name
    description = tree.xpath(
        '/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()',
        namespaces=ns)
    if len(description):
        description = str(description[0])
    else:
        description = ''

    return BookMeta(title=title,
                    author=author,
                    cover_path="",
                    description=description,
                    tags="",
                    series="",
                    series_id="",
                    languages="",
                    file_path=file_path,
                    file_name=file_name,
                    file_extension=file_extension,
                    file_size=os.path.getsize(abs_file_path))
示例#5
0
 def get_height_and_width(self, idx):
     # read xml
     xml_path = self.xml_list[idx]
     with open(xml_path) as fid:
         xml_str = fid.read()
     xml = etree.fromstring(xml_str)
     data = self.parse_xml_to_dict(xml)["annotation"]
     data_height = int(data["size"]["height"])
     data_width = int(data["size"]["width"])
     return data_height, data_width
示例#6
0
def attributes_to_dict(line):
    """Parses xml row into python dict"""
    try:
        parsed = etree.fromstring(line)
        ret = {}
        for key in parsed.keys():
            ret[key] = parsed.get(key)
    except (etree.XMLSyntaxError):
        print('Error encountered while trying to parse: ', line)
    return ret
示例#7
0
    def _parsed_msg(self):
        """parse self._body xml string and set it to self.__dict__

        """
        modified_key = self._modify_key("_parsed_msg")
        if modified_key in self.__dict__:
            return self.__dict__[modified_key]

        if self._msg is None:
            raise ValueError(u"self._msg should not be None")
        self.__dict__[modified_key] = etree.fromstring(self._msg)
        return self.__dict__[modified_key]
示例#8
0
def defaultConfig():
    cfg = Element("MetamergeConfig")
    fld = SubElement(cfg, "Folder")
    fld.attrib['name'] = "Config"
    #print ET.tostring(cfg)
    xml = ET.fromstring(defcfgxml)
    #print ET.tostring(xml)
    for child in xml.findall('./*'):
        #print "\n>> " + ET.tostring(child)
        fld.append(child)
        #print "\n-- " + ET.tostring(subel)
        #print ET.tostring(cfg)
    #print ET.tostring(cfg)
    return cfg
示例#9
0
def alpino_parse(sent, host='zardoz.service.rug.nl', port=42424):
    s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    s.connect((host,port))
    sent = sent + "\n\n"
    sentbytes= sent.encode('utf-8')
    s.sendall(sentbytes)
    bytes_received= b''
    while True:
        byte = s.recv(8192)
        if not byte:
            break
        bytes_received += byte
    #print(bytes_received.decode('utf-8'), file=sys.stderr)
    xml = etree.fromstring(bytes_received)
    return xml
示例#10
0
def alpino_parse(sent, host='zardoz.service.rug.nl', port=42424):
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect((host, port))
    sent = sent + "\n\n"
    sentbytes = sent.encode('utf-8')
    s.sendall(sentbytes)
    bytes_received = b''
    while True:
        byte = s.recv(8192)
        if not byte:
            break
        bytes_received += byte
    #print(bytes_received.decode('utf-8'), file=sys.stderr)
    xml = etree.fromstring(bytes_received)
    return xml
示例#11
0
    def coco_index(self, idx):
        """
        该方法是专门为pycocotools统计标签信息准备,不对图像和标签作任何处理
        由于不用去读取图片,可大幅缩减统计时间

        Args:
            idx: 输入需要获取图像的索引
        """
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        data_height = int(data["size"]["height"])
        data_width = int(data["size"]["width"])
        # img_path = os.path.join(self.img_root, data["filename"])
        # image = Image.open(img_path)
        # if image.format != "JPEG":
        #     raise ValueError("Image format not JPEG")
        boxes = []
        labels = []
        iscrowd = []
        for obj in data["object"]:
            xmin = float(obj["bndbox"]["xmin"])
            xmax = float(obj["bndbox"]["xmax"])
            ymin = float(obj["bndbox"]["ymin"])
            ymax = float(obj["bndbox"]["ymax"])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            iscrowd.append(int(obj["difficult"]))

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        return (data_height, data_width), target
示例#12
0
    def prettyPrintXml(self,):
        # Generate xml text
        xmlText = self.xmlData.decode()
        self.xmlText = xmlText

        if(self.xmlText is not None and len(self.xmlText) > 0):
            #print('***xml Text:\n' + str(self.xmlText))
            rootElement = etree.fromstring(self.xmlText)
            prettyPrintText=etree.tostring(rootElement, pretty_print=True).decode()
            if(prettyPrintText is not None and len(prettyPrintText) > 0):
                self.xmlText = prettyPrintText

            if (self.xmlFile is not None):
                elementTree = etree.ElementTree(rootElement)
                elementTree.write(self.xmlFile, pretty_print=True, encoding='utf-8')
            else:
                print('Not writing xml text to file, because None was provided for xmlFile parameter')
示例#13
0
    def __getitem__(self, idx):
        # read xml
        xml_path = self.xml_list[idx]
        with open(xml_path) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = self.parse_xml_to_dict(xml)["annotation"]
        img_path = os.path.join(self.img_root, data["filename"])
        image = Image.open(img_path)
        if image.format != "JPEG":
            raise ValueError("Image format not JPEG")
        boxes = []
        labels = []
        iscrowd = []
        for obj in data["object"]:
            xmin = float(obj["bndbox"]["xmin"])
            xmax = float(obj["bndbox"]["xmax"])
            ymin = float(obj["bndbox"]["ymin"])
            ymax = float(obj["bndbox"]["ymax"])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.class_dict[obj["name"]])
            iscrowd.append(int(obj["difficult"]))

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target
示例#14
0
    def send_cmd_int(cls, cmd, msg_type):
        '''Construct NX-API message. Send commands through NX-API. Only single
           command for show commands. Internal usage'''
        if msg_type == "cli_show" or msg_type == "cli_show_ascii":
            if " ;" in cmd:
                raise cmd_exec_error("Only single show command supported in internal api")

        req_msg_str = cls.req_obj.get_req_msg_str(msg_type=msg_type,
                input_cmd=cmd, out_format=cls.out_format,
                do_chunk=cls.do_chunk, sid=cls.sid)
        (resp_headers, resp_str) = \
            cls.req_fetcher.get_resp(req_msg_str, cls.cookie,
                cls.timeout)

        if 'Set-Cookie' in resp_headers:
            cls.cookie = resp_headers['Set-Cookie']
        content_type = resp_headers['Content-Type']
        root = etree.fromstring(resp_str)
        body = root.findall('.//body')
        code = root.findall('.//code')
        msg = root.findall('.//msg')

        output = ""
        status = 0
        if len(body) != 0:
            if msg_type == 'cli_show':
                output = etree.tostring(body[0])
            else:
                output = body[0].text

        if output == None:
            output = ""
        if code[0].text == "200":
            status = 0
        else:
            status = int(code[0].text)
        return [output, status, msg[0].text]
示例#15
0
 def test_tree_generation(self):
     with open('tests/instruments/companies.xml') as fp:
         tree = etree.fromstring(remove_nbsp(fp.read()), parser=self.parser)
     document_tree = create_document_tree(tree)
     with open('tests/companiesact_doc_tree.json') as fp:
         self.assertEqual(json.dumps(document_tree), fp.read().strip())
示例#16
0
 def test_tree_generation(self):
     with open('tests/instruments/companies.xml') as fp:
         tree = etree.fromstring(remove_nbsp(fp.read()), parser=self.parser)
     document_tree = create_document_tree(tree)
     with open('tests/companiesact_doc_tree.json') as fp:
         self.assertEqual(json.dumps(document_tree), fp.read().strip())
示例#17
0
file_in = "osim-rl/osim/models/gait9dof18musc.osim"
file_out = "mujoco_gait9dof18musc.xml"

import xml.etree
from lxml import etree
parser = etree.XMLParser(recover=True)
e = etree.fromstring(xmlstring, parser=parser)

e = xml.etree.ElementTree.parse('thefile.xml').getroot()
def parse_with_lxml():
    root = etree.fromstring(CONTENT)
    for log in root.xpath("//log"):
        print log.text