Python SimilarLink示例

编程语言: Python

命名空间/包名称: spatula

类/类型: SimilarLink

hotexamples.com的示例: 6

Python SimilarLink - 已找到6个示例。这些是从开源项目中提取的最受好评的spatula.SimilarLink现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

SimilarLink(6)

split(1)

示例#1

显示文件

    def process_page(self):
        # annapolis_info = (
        #     XPath("//dt[text()='Annapolis Info']/following-sibling::dd[1]")
        #     .match_one(self.root)
        #     .text_content()
        # )
        # interim_info = (
        #     XPath("//dt[text()='Interim Info']/following-sibling::dd[1]")
        #     .match_one(self.root)
        #     .text_content()
        # )

        # email is formatted mailto:<addr>?body...
        email = SimilarLink("mailto:").match_one(self.root).get("href")
        email = email.split(":", 1)[1].split("?")[0]

        p = Person(
            name=CSS("h2").match_one(self.root).text.split(" ", 1)[1],
            state="md",
            image=self.image_sel.match_one(self.root).get("src"),
            party=self.extract_dd("Party"),
            district=self.extract_dd("District"),
            chamber=None,
            email=email,
        )
        p.add_link(self.source.url)
        p.add_source(self.source.url)
        return p

示例#2

显示文件

class SenateDetail(HtmlPage):
    name_css = CSS(".field--name-title")
    image_css = CSS(".bSenBio__media-btn")
    district_css = CSS(".bDistrict h2")
    address_css = CSS(".bSenBio__address p")
    phone_css = CSS(".bSenBio__tel a")
    contact_link_sel = SimilarLink(
        r"https://oksenate.gov/contact-senator\?sid=")

    def process_page(self):
        for bio in CSS(".bSenBio__infoIt").match(self.root):
            if "Party:" in bio.text_content():
                party = bio.text_content().split(":")[1].strip()
        p = ScrapePerson(
            name=self.name_css.match_one(self.root).text,
            state="ok",
            chamber="upper",
            party=party,
            image=self.image_css.match_one(self.root).get("href"),
            district=self.district_css.match_one(
                self.root).text.strip().split()[1],
        )
        p.capitol_office.address = self.address_css.match_one(self.root).text
        p.capitol_office.phone = self.phone_css.match_one(self.root).text
        p.add_link(
            self.contact_link_sel.match_one(self.root).get("href"),
            "Contact Form")

        return p

示例#3

显示文件

class HouseDetail(HtmlPage):
    image_selector = SimilarLink(
        "https://www.okhouse.gov/Members/Pictures/HiRes/")
    prefix = "#ctl00_ContentPlaceHolder1_lbl"
    name_css = CSS(prefix + "Name")
    district_css = CSS(prefix + "District")
    party_css = CSS(prefix + "Party")

    def process_page(self):
        name = self.name_css.match_one(self.root).text.split(maxsplit=1)[1]
        p = ScrapePerson(
            name=name,
            state="ok",
            chamber="upper",
            party=self.party_css.match_one(self.root).text,
            district=self.district_css.match_one(self.root).text.split()[1],
        )
        p.image = self.image_selector.match_one(self.root).get("href")

        contact_url = self.source.url.replace("District.aspx", "Contact.aspx")
        assert contact_url.startswith(
            "https://www.okhouse.gov/Members/Contact.aspx?District=")
        p.add_link(contact_url, note="Contact Form")

        # capitol address
        check_capitol_address = (CSS(".districtheadleft").match(
            self.root)[0].text_content().strip())
        if check_capitol_address == "Capitol Address:":
            capitol_address_div = (CSS(".districtheadleft + div").match(
                self.root)[0].text_content().strip().splitlines())
            p.capitol_office.address = "; ".join(
                [ln.strip() for ln in capitol_address_div[:-1]])
            p.capitol_office.phone = capitol_address_div[-1].strip()
        return p

示例#4

显示文件

class House(HtmlListPage):
    selector = SimilarLink(
        r"https://www.okhouse.gov/Members/District.aspx\?District=", num_items=101
    )
    source = "https://www.okhouse.gov/Members/Default.aspx"

    def process_item(self, item):
        return HouseDetail({"name": item.text.strip()}, source=item.get("href"))

示例#5

显示文件

class Senate(HtmlListPage):
    selector = SimilarLink("https://oksenate.gov/senators/", min_items=45, max_items=48)
    source = "https://oksenate.gov/senators"

    def process_item(self, item):
        party, _, district, name = item.text_content().split(maxsplit=3)
        return SenateDetail(
            {"party": party, "district": district, "name": name.strip()},
            source=item.get("href"),
        )

示例#6

显示文件

文件： test_selectors.py 项目： jamesturk/spatula

def test_similar_link_selector():
    root = lxml.etree.fromstring(dummy_html)
    assert len(SimilarLink("https").match(root)) == 2