Python Element.xpath示例

编程语言: Python

命名空间/包名称: requests_html

类/类型: Element

方法/功能: xpath

hotexamples.com的示例: 6

Python Element.xpath - 已找到6个示例。这些是从开源项目中提取的最受好评的requests_html.Element.xpath现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

find(30)

Element(7)

xpath(6)

append(1)

pop(1)

search(1)

示例#1

显示文件

    def parse_flat(self, html: Element) -> None:  # noqa: CCR001
        """Get info about flat.

        Get all info about flat in given html element.

        :param html: Given element
        """
        try:
            flat_url = html.find("a", first=True).attrs.get("href")
            flat_id = int(re.search(r"flat/(\d+)", flat_url).group(1))
            location = html.xpath(".//a[@data-name='GeoLabel']/text()")
            if self.domain == "ekb":
                location = location[1:]
            city, district, *location = location
            location = " ".join(location)
            price = html.xpath(".//span[@data-mark='MainPrice']/text()",
                               first=True)
            price = int(price.replace("₽", "").strip().replace(" ", ""))
            ppm = html.xpath(".//p[@data-mark='PriceInfo']/text()", first=True)
            ppm = int(ppm.replace("₽/м²", "").strip().replace(" ", ""))
            square = round(price / ppm, 2)
            if not Flat.exists(id=flat_id):
                Flat(
                    id=flat_id,
                    city=city,
                    district=district,
                    location=location,
                    price=price,
                    ppm=ppm,
                    square=square,
                )
                commit()
        except Exception as exc:
            print(exc)
            rollback()

示例#2

显示文件

文件： core.py 项目： reubenraff/wikipron

def yield_pron(
    request_html: requests_html.Element,
    ipa_xpath_selector: str,
    config: "Config",
) -> "Iterator[Pron]":
    for ipa_element in request_html.xpath(ipa_xpath_selector):
        m = re.search(config.ipa_regex, ipa_element.text)
        if not m:
            continue
        pron = m.group(1)
        # Removes parens around various segments.
        pron = pron.replace("(", "").replace(")", "")
        if _skip_pron(pron, config.skip_spaces_pron):
            continue
        try:
            # All pronunciation processing is done in NFD-space.
            pron = unicodedata.normalize("NFD", pron)
            pron = config.process_pron(pron)
        except IndexError:
            logging.info(
                "IndexError encountered processing %s during scrape of %s",
                pron,
                config.language,
            )
            continue
        if pron:
            # The segments package inserts a # in-between spaces.
            if not config.skip_spaces_pron:
                pron = pron.replace(" #", "")
            yield pron

示例#3

显示文件

文件： parsers.py 项目： joaojunior/crawler_tribunais

def movements(process: requests_html.Element) -> List[Dict]:
    rows = process.xpath('//tr')
    result = []
    for row in rows:
        data = []
        for col in row.xpath('//td'):
            data.append(col.text)
        result.append({'data': data[0], 'movimento': ''.join(data[1:])})
    return result

示例#4

显示文件

文件： parsers.py 项目： joaojunior/crawler_tribunais

def parts(process_parts: requests_html.Element) -> List[List[Dict]]:
    rows = process_parts.xpath('//tr')
    result = []
    for row in rows:
        data = []
        values = row.text.replace('\xa0', '').replace(':\n', ':').split('\n')
        for value in values:
            value = value.split(':')
            data.append({value[0]: value[1].strip()})
        result.append(data)
    return result

示例#5

显示文件

文件： parsers.py 项目： joaojunior/crawler_tribunais

def general_data(process_general_data: requests_html.Element) -> Dict:
    result = {}
    names = [
        'Classe', 'Área', 'Assunto', 'Distribuição', 'Juiz', 'Relator',
        'Valor da ação'
    ]
    for name in names:
        field = process_general_data.xpath(
            f"//tr[contains(string(), '{name}')]", first=True)
        if field:
            field = field.text
            field = field.replace(': ', ':\n')
            field = field.split(':\n')
            result[field[0]] = field[1]
    return result

示例#6

显示文件

def yield_pron(
    request_html: requests_html.Element,
    ipa_xpath_selector: str,
    config: "Config",
) -> "Iterator[Pron]":
    for ipa_element in request_html.xpath(ipa_xpath_selector):
        m = re.search(config.ipa_regex, ipa_element.text)
        if not m:
            continue
        pron = m.group(1)
        # Removes parens around various segments.
        pron = pron.replace("(", "").replace(")", "")
        if _skip_pron(pron):
            continue
        pron = config.process_pron(pron)
        if pron:
            yield pron