Python CSS.split примеры использования

Язык программирования: Python

Пространство имен/Пакет: spatula

Класс/Тип: CSS

Метод/Функция: split

Примеров на hotexamples.com: 5

Python CSS.split - 5 примеров найдено. Это лучшие примеры Python кода для spatula.CSS.split, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CSS(30)

get(7)

text_content(7)

split(5)

replace(3)

startswith(2)

endswith(1)

getparent(1)

Пример #1

Показать файл

Файл: people.py Проект: jealob/openstates-scrapers

    def process_item(self, item):
        name_dirty = CSS("h4 span").match_one(item).text_content().strip()
        if re.search(r"Vacant", name_dirty):
            self.skip()
        name_dirty = name_dirty.split(", ")
        last_name = name_dirty[0]
        first_name = name_dirty[1]
        name = first_name + " " + last_name

        district = CSS("i.fa.fa-map").match_one(
            item).getnext().text_content().strip()
        party = CSS("i.fa.fa-users").match_one(
            item).getnext().text_content().strip()
        if party == "Democrat":
            party = "Democratic"
        email = CSS("a").match(item)[2].text_content().strip()
        img = CSS("img").match_one(item).get("src")

        p = ScrapePerson(
            name=name,
            state="la",
            party=party,
            district=district,
            chamber=self.chamber,
            email=email,
            image=img,
        )

        detail_link = CSS("a").match(item)[1].get("href")

        p.add_source(self.source.url)
        p.add_source(detail_link)
        p.add_link(detail_link, note="homepage")

        return LegislatorDetail(p, source=detail_link)

Пример #2

Показать файл

    def process_item(self, item):
        name = CSS(".mediaCaptionTitle").match_one(item).text
        subtitle = CSS(".mediaCaptionSubtitle").match_one(item).text
        image = CSS(".photo").match_one(item).get("style")
        image = background_image_re.findall(image)[0]
        # e.g. District 25 | D
        district, party = subtitle.split(" | ")
        district = district.split()[1]
        party = {"D": "Democratic", "R": "Republican"}[party]

        return HouseDetail(
            HousePartial(
                name=name, district=district, party=party, url=item.get("href"), image=image,
            )
        )

Пример #3

Показать файл

Файл: people.py Проект: csnardi/openstates

    def process_page(self):
        p = self.input

        email = CSS("div.sen-contact a").match(self.root)[0].get("href")
        email = re.search(r"mailto:(.+)", email).groups()[0]
        p.email = email

        addr = CSS("div.sen-contact p").match(self.root)[0].text_content()

        # no phone for this link
        if self.source.url == "https://www.indianasenaterepublicans.com/young":
            addr = addr
            phone1 = None
            phone2 = None
        else:
            addr, phone1, phone2 = re.search(
                r"(.+)Phone:\s(\d{3}-\d{3}-\d{4})\s?or\s(\d{3}-\d{3}-\d{4})", addr
            ).groups()

        p.capitol_office.address = addr
        if phone1:
            p.capitol_office.voice = phone1
        if phone2:
            p.extras["second phone"] = phone2

        if len(CSS("div.sen-contact p").match(self.root)) == 1:
            leg_assist = CSS("div.sen-contact p").match_one(self.root).text_content()
        else:
            leg_assist = CSS("div.sen-contact p").match(self.root)[1].text_content()

        if len(CSS("div.sen-contact p").match(self.root)) < 3:
            extra_contacts = leg_assist.split("Media Contact:")
            leg_assist = extra_contacts[0]
            media_contact = extra_contacts[1]
            leg_assist_name, leg_assist_phone, leg_assist_email = re.search(
                r"Legislative\sAssistant:?(.+)Phone:\s(.+)Email:\s(.+)", leg_assist
            ).groups()
            media_contact_name, media_contact_phone, media_contact_email = re.search(
                r"(.+)Phone:\s(.+)Email:\s(.+)", media_contact
            ).groups()
        elif (
            len(CSS("div.sen-contact p").match(self.root)) == 3
            or self.source.url == "https://www.indianasenaterepublicans.com/bray"
        ):
            leg_assist_name, leg_assist_phone, leg_assist_email = re.search(
                r"Legislative\sAssistant:?(.+)Phone:\s(.+)Email:\s(.+)", leg_assist
            ).groups()
            media_contact = CSS("div.sen-contact p").match(self.root)[2].text_content()
            media_contact_name, media_contact_phone, media_contact_email = re.search(
                r"Media\sContact:(.+)Phone:\s(.+)Email:\s(.+)", media_contact
            ).groups()
        else:
            leg_assist_name, leg_assist_phone, leg_assist_email = re.search(
                r"Legislative\sAssistant:?(.+)Phone:\s(.+)Email:\s(.+)", leg_assist
            ).groups()
            media_contact = CSS("div.sen-contact p").match(self.root)[3].text_content()
            media_contact_name, media_contact_phone, media_contact_email = re.search(
                r"Media\sContact:(.+)Phone:\s(.+)Email:\s(.+)", media_contact
            ).groups()

        p.extras["legislative assistant name"] = leg_assist_name
        p.extras["legislative assistant phone"] = leg_assist_phone
        p.extras["legislative assistant email"] = leg_assist_email
        p.extras["media contact name"] = media_contact_name
        p.extras["media contact phone"] = media_contact_phone
        p.extras["media contact email"] = media_contact_email

        """
        try:
            # need to deal with multi-lines of education
            print(
                XPath("//h3[contains(text(), 'Education')]")
                .match(self.root)[0]
                .getnext()
                .text_content()
            )
        except SelectorError:
            pass
        """

        return p

Пример #4

Показать файл

    def process_page(self):
        p = self.input

        img = CSS("img.leg-img").match_one(self.root).get("src")
        p.image = img

        title = (
            CSS("div .row.profile-top h3").match_one(self.root).text_content().strip()
        )
        if title != "":
            p.extras["title"] = title

        counties = CSS("div .center ul li").match_one(self.root).text_content()
        if re.search(r"\(Part\)", counties):
            counties = re.search(r"(.+)\s\(Part\)", counties).groups()[0]
        counties = counties.split(", ")
        p.extras["counties represented"] = counties

        email = (
            XPath("//div[2]/p[contains(text(), 'Email')]")
            .match_one(self.root)
            .getnext()
            .text_content()
        )
        p.email = email

        addresses = CSS("address").match(self.root)
        for addr in addresses:
            address_clean = " "
            addr_type = addr.getprevious().text_content()
            addr_lst = XPath("text()").match(addr)
            address_clean = address_clean.join(addr_lst)
            if addr_type == "Mailing Address":
                p.extras["mailing address"] = address_clean
            elif addr_type == "Legislative Address":
                p.district_office.address = address_clean
            elif addr_type == "Capitol Address":
                p.capitol_office.address = address_clean

        phones = (
            XPath("//div[2]/p[contains(text(), 'Phone Number(s)')]")
            .match_one(self.root)
            .getnext()
        )
        phones = XPath("text()").match(phones)
        for num in phones:
            kind, num = num.split(": ")
            if kind == "LRC" and num.endswith(" (fax)"):
                fax = num.replace(" (fax)", "")
                p.capitol_office.fax = fax
            elif kind == "LRC":
                p.capitol_office.voice = num
            elif kind == "Home" and num.endswith(" (fax)"):
                fax = num.replace(" (fax)", "")
                p.district_office.fax = fax
            elif kind == "Home":
                p.district_office.voice = num
            elif kind == "Work" and num.endswith(" (fax)"):
                fax = num.replace(" (fax)", "")
                p.extras["fax"] = fax
            elif kind == "Work":
                p.extras["voice"] = num

        try:
            twitter = (
                XPath("//div[2]/p[contains(text(), 'Twitter')]")
                .match_one(self.root)
                .getnext()
                .text_content()
                .lstrip("@")
            )
            p.ids.twitter = twitter
        except SelectorError:
            pass

        try:
            home_city = (
                XPath("//div[2]/p[contains(text(), 'Home City')]")
                .match_one(self.root)
                .getnext()
                .text_content()
            )
            p.extras["home city"] = home_city
        except SelectorError:
            pass

        return p

Пример #5

Показать файл

    def process_page(self):
        name = CSS("h1").match(self.root)[0].text_content().strip()

        district = CSS("p.h4").match_one(self.root).text_content().strip()
        if re.search(r"&bullet;", district):
            district = re.search(r"&bullet;(.+)", district).groups()[0].strip()
        if district == "chairman":
            district = "Chairman"

        party = CSS("ul li p").match(self.root)[1].text_content().strip()
        if re.search(r"Party", party):
            party = re.search(r"(.+)\sParty", party).groups()[0]

        p = ScrapePerson(
            name=name,
            party=party,
            district=district,
            state=self.input.state,
            chamber=self.input.chamber,
            image=self.input.image,
        )
        p.add_source(self.input.source1)
        p.add_source(self.input.source2)
        p.add_link(self.input.link, note="homepage")

        addr = CSS("ul li p").match(self.root)[3].text_content().strip()
        p.capitol_office.address = addr

        email = CSS("p.byline a").match(self.root)[0].text_content().strip()
        p.email = email

        phone = CSS("p.byline a").match(self.root)[1].text_content().strip()
        if re.search(r"tel:", phone):
            phone = re.search(r"tel:(.+)", phone).groups()[0]
        p.capitol_office.voice = phone

        all_text = CSS("p.byline").match_one(self.root).text_content().strip()
        fax = all_text.split("Fax: ")[1]
        p.capitol_office.fax = fax

        if len(CSS("section.aside-section a").match(self.root)) == 2:
            # no extra info
            return p
        elif len(CSS("section.aside-section a").match(self.root)) == 3:
            # just a website
            website = CSS("section.aside-section a").match(
                self.root)[2].get("href")
            p.extras["website"] = website
        elif len(CSS("section.aside-section a").match(self.root)) == 4:
            # just fb and twitter
            fb = CSS("section.aside-section a").match(self.root)[2].get("href")
            fb = fb.split("/")[-2]
            twitter = CSS("section.aside-section a").match(
                self.root)[3].get("href")
            twitter = twitter.split("/")[-1]
            p.ids.facebook = fb
            p.ids.twitter = twitter
        else:
            # website, fb, and twitter
            website = CSS("section.aside-section a").match(
                self.root)[2].get("href")
            p.extras["website"] = website
            fb = CSS("section.aside-section a").match(self.root)[3].get("href")
            fb = fb.split("/")
            if fb[-1] == "":
                fb = fb[-2]
            else:
                fb = fb[-1]
            twitter = CSS("section.aside-section a").match(
                self.root)[4].get("href")
            twitter = twitter.split("/")[-1]
            p.ids.facebook = fb
            p.ids.twitter = twitter

        return p