def uniform_yes_no(tt: etree) -> None:
    for k in tt.attrib:
        v = tt.attrib[k]
        if v.lower() == 'y' or v.lower() == 'yes':
            tt.attrib[k] = 'Y'
        elif v.lower() == 'n' or v.lower() == 'no':
            tt.attrib[k] = 'N'
Пример #2
0
def add_ids_aux(element: etree,
                ids: defaultdict,
                parent_id: str = "") -> defaultdict:
    """ Add ids to xml element

    Args:
        element (etree): Element to add ids to
        ids (defaultdict): counters for ids assigned so far by tag type
        parent_id (str): Optional; id of parent element, by default ''

    Returns:
        defaultdict: ids, with new counts added by tag type
    """
    if element.tag is etree.Comment:
        return ids
    tag = etree.QName(element.tag).localname
    if tag in TAGS_TO_IGNORE:
        return ids
    if is_do_not_align(element):
        if tag == "w":
            raise RuntimeError(
                'Found <w> element with do-not-align="true" attribute. '
                "This is not allowed, please verify you XML input.")
        if element.xpath(".//w"):
            raise RuntimeError(
                'Found <w> nested inside a do-not-align="true" element. '
                "This is not allowed, please verify you XML input.")
        return ids
    if "id" not in element.attrib:
        if tag in TAG_TO_ID:
            id = TAG_TO_ID[tag]
        elif tag == "seg" and "type" in element.attrib:
            if element.attrib["type"] == "syll":
                id = "y"
            elif element.attrib["type"] in [
                    "morph",
                    "morpheme",
                    "base",
                    "root",
                    "prefix",
                    "suffix",
            ]:
                id = "m"
        else:
            id = tag
        if id not in ids:
            ids[id] = 0
        element.attrib["id"] = parent_id + id + str(ids[id])
        ids[id] += 1
    full_id = element.attrib["id"]
    # This deep copy of ids means that the ids counters are shared recursively
    # between siblings, but not between grand-children. Thus, if processing a p
    # element, the next p element will see its counter incremented, but the s
    # elements of the next p elements will start again at 0. ids always has the
    # counters of all ancestors and their siblings, by tag, but not the
    # descendents of siblings of ancestors.
    new_ids = deepcopy(ids)
    for child in element:
        new_ids = add_ids_aux(child, new_ids, full_id)
    return ids
Пример #3
0
 def _error_check(self, command_response: etree) -> None:
     """commmand_response will be an XML Etree object."""
     error_list = command_response.find("./clierror")
     command_obj = command_response.find("./input")
     if error_list is not None:
         command = command_obj.text if command_obj is not None else "Unknown command"
         msg = etree.tostring(error_list).decode()
         raise NXAPICommandError(command, msg)
Пример #4
0
def create_game(root: Element):
    game = game_models.Game(root.attrib['name'])
    game.max_players = root.attrib['max_players']
    game.min_players = root.attrib['min_players']
    add_actions(root.find("actions"), game)
    add_collections(root.find("collections"), game)
    add_turns(root.find("turns"), game)
    add_pieces(root.find("pieces"), game)
    return game
def trim_and_sort_attrs(tt: etree) -> None:
    for k in tt.attrib:
        tt.attrib[k] = tt.attrib[k].strip()
    dd = {}
    for k in tt.attrib:
        v = tt.attrib.pop(k)
        dd[k] = v
    for k in sorted(dd):
        tt.attrib[k] = dd[k]
Пример #6
0
def get_charges(stree: etree) -> List[Charge]:
    """
    Find a list of the charges in a parsed docket.
    """
    # find the charges in the Charges section
    charges = stree.xpath("//section[@name='section_charges']//charge")
    # charges is temporarily a list of tuples of [(sequence_num, Charge)]
    charges = [(
        xpath_or_blank(charge, "./seq_num"),
        Charge(
            offense=xpath_or_blank(charge, "./statute_description"),
            grade=xpath_or_blank(charge, "./grade"),
            statute=xpath_or_blank(charge, "./statute"),
            disposition="Unknown",
            disposition_date=None,
            sentences=[],
        ),
    ) for charge in charges]
    # figure out the disposition dates by looking for a final disposition date that matches a charge.
    final_disposition_events = stree.xpath(
        "//section[@name='section_disposition_sentencing']//case_event[case_event_desc_and_date/is_final[contains(text(),'Final Disposition')]]"
    )
    for final_disp_event in final_disposition_events:
        final_disp_date = xpath_date_or_blank(final_disp_event,
                                              ".//case_event_date")
        applies_to_sequences = xpath_or_empty_list(final_disp_event,
                                                   ".//sequence_number")
        for seq_num in applies_to_sequences:
            # set the final_disp date for the charge with sequence number seq_num
            for sn, charge in charges:
                if sn == seq_num:
                    charge.disposition_date = final_disp_date

    # Figure out the disposition of each charge from the disposition section.
    #   Do this by finding the last sequence in the disposition section for
    #   the sequence with seq_num. The disposition of the charge is that
    #   sequence's disposition. Sentence is in that xml element too.
    try:
        disposition_section = stree.xpath(
            "//section[@name='section_disposition_sentencing']")[0]
        for seq_num, charge in charges:
            try:
                # seq is the last sequence for the charge seq_num.
                seq = disposition_section.xpath(
                    f"./disposition_section/disposition_subsection/disposition_details/case_event/sequences/sequence[sequence_number/text()=' {seq_num} ']"
                )[-1]
                charge.disposition = xpath_or_blank(seq,
                                                    "./offense_disposition")
                charge.sentences = get_sentences(seq)
            except IndexError:
                continue
    except IndexError:
        pass
    return [c for i, c in charges]
Пример #7
0
	def save_file(self, obj):
		qfd = QtWidgets.QFileDialog()
		(name, file_type) = QtWidgets.QFileDialog.getSaveFileName(qfd, 'Save File', "","XML files (*.xml)")
		try:
			xmlfile = open(name,'w')
			tree = ET(obj.xmlroot)
			tree.write(xmlfile, encoding='unicode')
			xmlfile.close()
		except:
			print("No file saved")
			pass
Пример #8
0
    def _parse_html(self, html: etree) -> None:
        """Parse the html code returned from server."""
        self.name = html.xpath('//div[@class="mytitle h4"]')[0].text
        self.iban = html.xpath('//div[@class="mysubtitle h4"]')[0].text

        panel = html.xpath('//div[@class="myPanelData"]')[0]
        self.currency = panel.xpath('//span[@class="mycurr"]')[0].text
        self.balance = self._parse_float(
            panel.xpath(AMOUNT_SEARCH_PATH.format(1))[0].text)
        self.interest_sum = self._parse_float(
            panel.xpath(AMOUNT_SEARCH_PATH.format(2))[0].text)
        self.interest_rate = self._parse_float(
            panel.xpath(AMOUNT_SEARCH_PATH.format(3))[0].text)
        self._notify_listeners()
Пример #9
0
def get_abstract(parsed_document: etree, alt_text: str = 'n/a') -> str:
    try:
        abstract = parsed_document.find('front/article-meta/abstract/p').text
        if abstract is None:
            abstract = ''
            for section in parsed_document.findall(
                    'front/article-meta/abstract/sec/p'):
                # for sectioned abstracts
                abstract += str(section.text)
            if abstract == '':
                abstract = alt_text
    except AttributeError:
        abstract = alt_text
    return abstract
Пример #10
0
def convert_metars(root: etree) -> List[Metar]:
    """
    Convert metar data for the database.

    :param root: XML etree root.
    :return: List of SQLAlchemy Base classes for Metars.
    """
    def process(kids: List[Element], xml_class: MetarXML) -> MetarXML:
        """
        Process the XML data so that it can be mapped to the database.

        :param kids: child branches of the etree.
        :param xml_class: Empty XML class object.
        :return: Instantiated class with loaded data.
        """
        for elt in kids:
            if elt.attrib:
                xml_class.add_child(process_attrib_metar(elt))
            else:
                kwarg = {elt.tag: elt.text}
                xml_class.set(**kwarg)
        return xml_class

    data = root.find("data")
    elems = data.findall("METAR")
    maps = []
    for elm in elems:
        children = list(elm)
        proc = process(children, MetarXML())
        mapped = proc.create_mapping()
        maps.append(mapped)
    return maps
Пример #11
0
    def _node_to_dictionary(node: etree, ignore_attributes: bool = False):
        """
        Convert an lxml.etree node tree recursively into a nested dictionary.
        The node's attributes and child items will be added to it's dictionary.

        Args:
            node (etree): The etree node
            ignore_attributes (bool): Optional parameter; whether or not to
                                      skip the node's attributes. Default is False.
        """

        result = {} if ignore_attributes else dict(node.attrib)

        for child_node in node.iterchildren():

            key = child_node.tag.split("}")[1]

            if child_node.text and child_node.text.strip():
                value = child_node.text
            else:
                value = DIMRParser._node_to_dictionary(child_node)

            if key in result:

                if type(result[key]) is list:
                    result[key].append(value)
                else:
                    first_value = result[key].copy()
                    result[key] = [first_value, value]
            else:
                result[key] = value

        return result
Пример #12
0
def get_all_contents(dom: etree,
                     xpaths: list,
                     alt_to_p: bool = False) -> Tuple[str, str]:
    """Get content of all xpaths provided.

    Args:
        dom (etree): dom where to get the content
        xpaths (list): list of xpath expression used to extract content in dom object
        alt_to_p (bool, optional): If true, when an alt is found, a new element <p> is added with alt content (useful for readability). Defaults to False.

    Returns:
        str: [description]
    """
    content: str = ""
    alts: str = ""
    for xpath in xpaths:
        results = dom.xpath(xpath)
        if len(results) > 0:
            for result in results:
                enclosing: str = "%s%s"
                if result.tag != "p":
                    enclosing = "<p>%s</p>"

                alts = _get_alts(alt_to_p, result)

                content += enclosing % to_string(result)

    return content, alts
Пример #13
0
    def _find_ebay_products_info(tree: etree) -> (list, None):
        """ Find necessary eBay products info in html elements """

        products = tree.xpath('//li[@class="s-item   "]')

        if not len(products):
            logger.warning('Empty eBay products list before finding info')
            return

        ebay_ids = []

        for product in products:
            ebay_id = product.xpath('.//a[@class="s-item__link"]')[0].get(
                'href')

            if ebay_id is None:
                continue

            ebay_id = search(r'/\d{12}\?', ebay_id)

            if ebay_id is None:
                continue

            ebay_id = ebay_id.group()[1:-1]

            if len(ebay_id) != constants.ebay_id_length or ebay_id in ebay_ids:
                continue

            ebay_ids.append(ebay_id)

        if len(ebay_ids):
            return ebay_ids
Пример #14
0
    def _find_products_info(self, tree: etree) -> None:
        """ Find necessary products info in html elements """

        products = tree.xpath('//div[@data-asin]')

        if not len(products):
            logger.warning('Empty products list before finding info')
            return

        for product in products:
            asin = product.get('data-asin')
            title = product.xpath('.//img')[0].get('alt')

            if asin is None or len(asin) != constants.asin_length:
                continue

            if title is None or not len(title):
                continue

            title = sub(r'[^0-9a-z ]', '', title.lower())
            title = sub(r' {2,}', ' ', ' ' + title + ' ')
            title = sub(r' ({0}) '.format('|'.join(constants.stopwords)), ' ',
                        title)
            title = sub(r'^ | $', '', title)
            words = title.split()

            if len(words) > constants.title_max_words:
                words = words[:constants.title_n_words]

            self._products[asin] = {'title': ' '.join(words)}
Пример #15
0
def get_person(stree: etree) -> Person:
    """
    Extract a Person the xml of a docket, parsed into sections.

    Returns an empty Person object on errors.

    Args:
        stree: xml tree of a docket, parsed into a header and some number of sections
    
    Returns:
        a Person object
    """
    try:
        name = stree.xpath(
            "docket/header/caption/defendant_line")[0].text.strip()
        first_name, last_name = split_first_name(name)
    except IndexError:
        first_name = ""
        last_name = ""

    aliases = xpath_or_empty_list(stree, "//alias")
    date_of_birth = xpath_date_or_blank(stree, "//birth_date")
    return Person(first_name=first_name,
                  last_name=last_name,
                  date_of_birth=date_of_birth,
                  aliases=aliases)
Пример #16
0
 def _parse_list(self, tree: etree) -> str:
     nomore: list = tree.xpath('//div[@class="not-found"]')
     if nomore:
         return 'nomore'
     try:
         products: list = tree.xpath(
             '//div[@id="shop-all-list"]//div[@class="svr-info"]//a[@data-click-name="shop_info_gooddeal_click"]/@title'
         )
         product_link: list = tree.xpath(
             '//div[@id="shop-all-list"]//div[@class="svr-info"]//a[@data-click-name="shop_info_gooddeal_click"]/@href'
         )
         for product, link in zip(products, product_link):
             if '伊婉' in product:
                 self._ids.add(link)
         return 'hasmore'
     except Exception:
         pass
Пример #17
0
 def get_post_url(etroot: lxml.etree) -> str:
     """
     get post url from etree object
     :param etroot: lxml.etree object
     :return: url
     """
     post_url = "http:" + etroot.find('post_url').text
     return post_url
Пример #18
0
def xpath_date_or_blank(tree: etree, xpath: str) -> Optional[date]:
    """ Given an etree and an xpath expression, return the value of the expression 
    as a date, or None"""
    try:
        return datetime.strptime(
            tree.xpath(xpath)[0].text.strip(), r"%m/%d/%Y").date()
    except (IndexError, ValueError) as e:
        return None
Пример #19
0
    def get_img_url(self, node: etree) -> str:
        """get img url from enclosure or media:content tag if any

        Arguments:
            node {etree} -- item node of rss feed

        Returns:
            str -- the url of the image found in enclosure or media:content tag
        """
        img_url = ""
        enclosures = node.xpath(".//enclosure")
        # media:content tag
        medias = node.xpath(".//*[local-name()='content'][@url]")
        if len(enclosures) > 0:
            img_url = enclosures[0].get('url')
        elif len(medias) > 0:
            img_url = medias[0].get('url')
        return img_url
def insert_element(xml_tree: et, element: et.Element) -> None:
    namespaces = {'default': "http://www.tei-c.org/ns/1.0"}

    div_deposition = xml_tree.find('.//default:div[@type="deposition"]',
                                   namespaces=namespaces)
    div_deposition_parent = div_deposition.getparent()

    injection_position = div_deposition_parent.index(div_deposition) + 1
    div_deposition_parent.insert(injection_position, element)
Пример #21
0
    def remove(cls, xml_node: etree):
        if xml_node is None:
            return

        parent_node = xml_node.getparent()
        if parent_node is None:
            return

        parent_node.remove(xml_node)
Пример #22
0
def xpath_or_blank(stree: etree, xpath: str) -> str:
    """ given an etree and an xpath expression, return the value of the expression, or 
    an empty string. 
    
    A helper method"""
    try:
        return stree.xpath(xpath)[0].text.strip()
    except IndexError:
        return ""
Пример #23
0
def get_first_node(dom: etree, xpaths: list):
    """get first node found in the list of xpath expressions"""
    node: Optional[etree._Element] = None
    for xpath in xpaths:
        results = dom.xpath(xpath)
        if len(results) > 0:
            node = results[0]
            break
    return node
Пример #24
0
        def _get_params(self, html: etree) -> str:
            """ Определяем параметры товара """

            params = ""
            span_params = html.xpath(self.locators['txbParams'])
            for span_param in span_params:
                params += "|" + span_param.text.strip()

            return params
Пример #25
0
 def _replace_urls_process_links(dom: etree, attribute: str):
     for o in dom.xpath("//*[@%s]" % attribute):
         if o.attrib[attribute].startswith("//"):
             protocol: str = "http:"
             if self.handler.get_original_website().find("https") > -1:
                 protocol = "https:"
             o.attrib[attribute] = protocol + o.attrib[attribute]
         elif o.attrib[attribute].startswith("/"):
             o.attrib[attribute] = self.handler.get_original_website(
             ) + o.attrib[attribute][1:]
Пример #26
0
def create_action(element: Element):
    if "id" in element.attrib:
        name = element.attrib["id"]
    else:
        name = str(element.sourceline)
        for parent in element.iterancestors():
            if "id" in parent.attrib:
                name += parent.attrib["id"]
                break
    return game_models.Action(list(parse_step(step) for step in element), name)
Пример #27
0
 def _parse_item(self, tree: etree, link: str):
     try:
         item = {}
         item['product'] = tree.xpath(
             '//p[@class="product-name bold"]/text()')[0]
         item['link'] = link
         item['price'] = tree.xpath('//div[@class="price"]//text()')[-1]
         item['hospital'] = tree.xpath(
             '//div[@class="shop-item"]/p[@class="shop-name"]/text()')[0]
         item['address'] = tree.xpath(
             '//div[@class="shop-item"]/p[@class="shop-addr"]/text()'
         )[0].replace('地址:', '')
         item['phone'] = tree.xpath(
             '//div[@class="shop-item"]/p[@class="shop-phone"]/text()'
         )[0].replace('电话:', '')
         print(item)
         self.content.append(item)
         return 'success'
     except Exception:
         pass
Пример #28
0
def return_word_from_id(xml: etree, el_id: str) -> str:
    """Given an XML document, return the innertext at id

    Args:
        xml (etree): XML document
        el_id (str): ID

    Returns:
        str: Innertext of element with el_id in xml
    """
    return xml.xpath('//*[@id="%s"]/text()' % el_id)[0]
Пример #29
0
    def _check_location(tree: etree):
        """ Check current session location on Amazon """

        try:
            span = tree.xpath('//span[@id="glow-ingress-line2"]')[0]

        except IndexError:
            return False

        else:
            return span.text != CURRENT_AMAZON_LOCATION
Пример #30
0
 def get_priority_params_from_html(etroot: lxml.etree) -> dict:
     keys = []
     values = []
     for child in etroot.iter('input'):
         try:
             keys.append(child.attrib['name'])
             values.append(child.attrib['value'])
         except:
             pass
     d = dict(zip(keys, values))
     return d
Пример #31
0
def parse_select(element: Element):
    return step_models.Select(selectorparser.parse(element.attrib["from"], selectorparser.item), element.get("label"), element.sourceline)