Python XPath.strip примеры использования

Язык программирования: Python

Пространство имен/Пакет: spatula

Класс/Тип: XPath

Метод/Функция: strip

Примеров на hotexamples.com: 2

Python XPath.strip - 2 примера найдено. Это лучшие примеры Python кода для spatula.XPath.strip, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

XPath(30)

itersiblings(2)

split(2)

startswith(2)

strip(2)

text_content(2)

endswith(1)

getnext(1)

Пример #1

Показать файл

Файл: committees.py Проект: jessemortenson/openstates

    def process_page(self):
        com = self.input
        com.add_source(self.source.url)
        com.add_link(self.source.url, note="homepage")

        room, time = XPath(
            "//div[@class='col-sm-12 pb-2']//p[2]/text()").match(self.root)
        if re.search("On Call", time):
            time = time.split(" -")[0]
        com.extras["room"] = room.strip()
        com.extras["meeting schedule"] = time.strip()

        for link in XPath(
                '//div[contains(@class, "media-body")]//a[contains(@href, "member_bio")]'
        ).match(self.root):
            name = link.text_content().split(",")[0]
            if name:
                try:
                    positions = ("chair", "vice chair",
                                 "ranking minority member")
                    position = XPath("..//preceding-sibling::b/text()").match(
                        link)
                    for role in position:
                        position_str = ""
                        position_str += role.lower()
                        if position_str not in positions:
                            raise ValueError("unknown position")
                except SelectorError:
                    position_str = "member"
            com.add_member(name, position_str)

        return com

Пример #2

Показать файл

Файл: people.py Проект: csnardi/openstates

    def process_page(self):
        party_map = {
            "PNP": "Partido Nuevo Progresista",
            "PPD": u"Partido Popular Democr\xe1tico",
            "PIP": u"Partido Independentista Puertorrique\u00F1o",
        }

        try:
            party = CSS("span.partyBio").match_one(
                self.root).text_content().strip()
            party = party_map[party]
        except SelectorError:
            # HON. LISIE J. BURGOS MUÑIZ, HON. JOSÉ B. MÁRQUEZ REYES, HON. MARIANA NOGALES MOLINELLI
            # do not have their parties listed
            party = "Independent"

        p = ScrapePerson(
            name=self.input.name,
            state="pr",
            chamber="lower",
            district=self.input.district,
            party=party,
        )

        p.add_source(self.input.source)
        p.add_source(self.source.url)
        p.add_link(self.source.url, note="homepage")

        img = CSS("div.container-biography img").match(self.root)[0].get("src")
        p.image = img

        title = CSS("span.name br").match_one(self.root).tail.strip()
        if title != "":
            p.extras["title"] = title

        phones = (CSS("h6 span span span").match(
            self.root)[0].text_content().strip().split("\n"))
        phone1 = re.search(r"Tel\.\s(.+)", phones[0]).groups()[0]
        phone2 = re.search(r"Tel\.\s?(.+)?", phones[1]).groups()[0]
        # http://www.tucamarapr.org/dnncamara/ComposiciondelaCamara/biografia.aspx?rep=251 has an incomplete phone
        if phone1.strip() != "" and phone1.strip() != "(787":
            p.district_office.voice = phone1.strip()
        if phone2 and phone2.strip() != "":
            p.extras["phone 2"] = phone2.strip()

        fax = (CSS("h6 span span span").match(
            self.root)[1].text_content().strip().split("\n"))
        fax1 = re.search(r"Fax\.\s(.+)", fax[0]).groups()[0]
        if fax1.strip() != "":
            p.district_office.fax = fax1.strip()
        tty = re.search(r"TTY\.\s?(.+)?", fax[1]).groups()[0]
        if tty and tty.strip() != "":
            p.extras["TTY"] = tty

        # these addresses do not look complete but capturing them anyway
        addr = XPath(
            "//*[@id='dnn_ctr1108_ViewWebRepresentatives_WebRepresentatives1_pnlRepresentative']/h6/text()[1]"
        ).match_one(self.root)
        if addr != "":
            p.district_office.address = addr.strip()

        return p