Пример #1
0
    def process_item(self, item):
        name = CSS("a").match(item)[2].text_content()
        name = re.sub(r"Contact Assembly Member", "", name).strip()

        party = CSS("td").match(item)[2].text_content().strip()
        if party == "Democrat":
            party = "Democratic"

        district = CSS("td").match(item)[1].text_content().strip().lstrip("0")

        # District 18 has a vacant spot
        if name == "edit":
            self.skip("skipping Vacant seat in District {}".format(district))

        photo_url = CSS("img").match(item, min_items=0)
        if photo_url:
            photo_url = photo_url[0].get("src")

        p = ScrapePerson(
            name=name,
            state="ca",
            chamber="lower",
            district=district,
            party=party,
            image=photo_url,
        )

        capitol_office_header = CSS("h3").match(item)[0].text_content()
        capitol_office_text = (
            XPath(
                "//*[@id='block-views-view-members-block-1']/div/div/div/table/tbody/tr[1]/td[4]/text()"
            )
            .match(item)[1]
            .strip()
        )
        capitol_office_text, capitol_office_phone = capitol_office_text.split("; ")
        capitol_office_address = capitol_office_header + capitol_office_text

        p.capitol_office.address = capitol_office_address
        p.capitol_office.voice = capitol_office_phone

        district_offices = XPath(".//td/p[1]/text()").match(item)

        for office in district_offices:
            district_address, district_phone = office.split("; ")
            p.add_office(
                classification="district",
                address=district_address.strip(),
                voice=district_phone.strip(),
            )

        url = CSS("a").match(item)[0].get("href")
        p.add_link(url)
        p.add_source(self.source.url)

        return p
Пример #2
0
    def process_item(self, item):
        lastname = item["LastName"]
        firstname = item["FirstName"]
        middlename = item["MiddleName"]
        name = firstname + " " + middlename + " " + lastname

        legislativebody = item["LegislativeBody"]
        if legislativebody == "H":
            chamber = "lower"
        elif legislativebody == "S":
            chamber = "upper"

        district_county = item["County"]
        district_num = item["District"]
        district = f"{district_county} {district_num.lstrip('0')}"

        party = item["party"]
        if party == "D" or party == "d":
            party = "Democratic"
        elif party == "R" or party == "r":
            party = "Republican"

        p = ScrapePerson(
            name=name, state="nh", chamber=chamber, district=district, party=party
        )

        p.add_source(self.source.url)

        p.family_name = lastname
        p.given_name = firstname

        county = item["County"]
        if county != "":
            p.extras["county"] = county

        electedStatus = item["electedStatus"].strip()
        if electedStatus != "":
            p.extras["elected status"] = electedStatus

        addr = item["Address"].strip()
        if addr != "":
            addr += " "
            if item["address2"].strip() != "":
                addr += item["address2"]
                addr += " "
            addr += item["city"]
            addr += ", NH "
            addr += item["Zipcode"]
            if item["Phone"].strip() != "":
                p.add_office(
                    classification="primary", address=addr, voice=item["Phone"]
                )
            else:
                p.add_office(classification="primary", address=addr)
            # is this primary office? or district office?

        if item["WorkEmail"].strip() != "":
            p.email = item["WorkEmail"].strip()

        if item["GenderCode"].strip() != "":
            p.extras["gender code"] = item["GenderCode"].strip()

        if chamber == "upper":
            detail_link = f"http://www.gencourt.state.nh.us/Senate/members/webpages/district{district_num}.aspx"
            p.add_source(detail_link)
            p.add_link(detail_link, note="homepage")
            return SenDetail(p, source=detail_link)

        # seat_number = seat_map[item["seatno"]]
        # detail_link = f"http://www.gencourt.state.nh.us/house/members/member.aspx?member={seat_number}"
        # return HouseDetail(p, source=detail_link)
        return p
Пример #3
0
    def process_item(self, item):
        name = XPath(".//h3/text()").match(item)[0]
        if name.endswith(" (R)"):
            party = "Republican"
        elif name.endswith(" (D)"):
            party = "Democratic"
        else:
            self.skip("skipping " + name)
        name = name.split(" (")[0]

        district = (
            XPath('.//div[contains(@class, "senator-district")]/div/text()'
                  ).match(item)[0].strip().lstrip("0"))

        photo_url = XPath(".//img/@src").match_one(item)

        p = ScrapePerson(
            name=name,
            state="ca",
            chamber="upper",
            district=district,
            party=party,
            image=photo_url,
        )

        capitol_office = XPath(
            ".//div[contains(@class, 'views-field-field-senator-capitol-office')]//p"
        ).match_one(item)
        capitol_address, capitol_phone = (
            capitol_office.text_content().replace(u"\xa0", " ").split("; "))
        p.capitol_office.address = capitol_address.strip()
        p.capitol_office.voice = capitol_phone.strip()

        district_office = XPath(
            ".//div[contains(@class, 'views-field-field-senator-district-office')]"
        ).match_one(item)
        for line in district_office.text_content().strip().splitlines():
            try:
                if re.search(r"District Offices?", line):
                    continue
                addr, phone = line.strip().replace(u"\xa0", " ").split("; ")
                p.add_office(
                    classification="district",
                    address=addr.strip(),
                    voice=phone.strip(),
                )
            except ValueError:
                # Steven Bradford address/phone separated by period instead of semi-colon
                if re.search(r"\w+\.\s\(\d{3}\)", line):
                    addr, phone = line.strip().replace(u"\xa0",
                                                       " ").split(". (")
                    phone = "(" + phone
                    p.add_office(
                        classification="district",
                        address=addr.strip(),
                        voice=phone.strip(),
                    )

        url = XPath(".//a/@href").match(item)[0]
        p.add_link(url)
        p.add_source(self.source.url)

        return p