Пример #1
0
    def get_member(self, session, chamber, kpid):
        url = "%smembers/%s" % (ksapi.url, kpid)
        content = json.loads(self.get(url).text)["content"]

        party = content["PARTY"]
        if party == "Democrat":
            party = "Democratic"

        slug = {
            "2013-2014": "b2013_14",
            "2015-2016": "b2015_16",
            "2017-2018": "b2017_18",
            "2019-2020": "b2019_20",
        }[session]
        leg_url = "http://www.kslegislature.org/li/%s/members/%s/" % (slug,
                                                                      kpid)

        try:
            legislator_page = self.lxmlize(leg_url)
            (photo_url,
             ) = legislator_page.xpath('//img[@class="profile-picture"]/@src')
        except scrapelib.HTTPError:
            self.warning("{}'s legislator bio page not found".format(
                content["FULLNAME"]))
            leg_url = ""
            photo_url = ""

        person = Person(
            name=content["FULLNAME"],
            district=str(content["DISTRICT"]),
            primary_org=chamber,
            party=party,
            image=photo_url,
        )
        person.extras = {"occupation": content["OCCUPATION"]}

        address = "\n".join([
            "Room {}".format(content["OFFICENUM"]),
            "Kansas State Capitol Building",
            "300 SW 10th St.",
            "Topeka, KS 66612",
        ])

        note = "Capitol Office"
        person.add_contact_detail(type="address", value=address, note=note)
        person.add_contact_detail(type="email",
                                  value=content["EMAIL"],
                                  note=note)
        if content["OFFPH"]:
            person.add_contact_detail(type="voice",
                                      value=content["OFFPH"],
                                      note=note)

        person.add_source(url)
        person.add_link(leg_url)

        yield person
Пример #2
0
    def _scrape_lower_chamber(self):
        self.info("Scraping lower chamber for legislators.")

        chamber = "lower"

        roster_url = self._reps_url
        page = self.get(roster_url).text
        page = lxml.html.fromstring(page)
        # This is the ASP.net table container
        table_xpath = "//table[@id='theTable']"
        table = page.xpath(table_xpath)[0]
        for tr in table.xpath("tr")[3:]:
            # If a given term hasn't occurred yet, then ignore it
            # Eg, in 2017, the 2018 term page will have a blank table
            if tr.attrib.get("class") == "dxgvEmptyDataRow":
                self.warning("No House members found")
                return

            tds = tr.xpath("td")
            last_name = tds[1].text_content().strip()
            first_name = tds[2].text_content().strip()
            full_name = "{} {}".format(first_name, last_name)
            district = str(int(tds[3].text_content().strip()))
            party = tds[4].text_content().strip()
            if party == "D":
                party = "Democratic"
            elif party == "R":
                party = "Republican"

            if party.strip() == "":  # Workaround for now.
                party = "Other"

            phone = tds[6].text_content().strip()
            room = tds[7].text_content().strip()

            address = self._assumed_address_fmt.format(room if room else "")

            if last_name == "Vacant":
                person = Person(name=full_name,
                                primary_org=chamber,
                                district=district,
                                party=party)
                person.extras = {
                    "first_name": first_name,
                    "last_name": last_name
                }

                person.add_contact_detail(type="address",
                                          value=address,
                                          note="Capitol Office")
                if phone.strip():
                    person.add_contact_detail(type="voice",
                                              value=phone,
                                              note="Capitol Office")

                person.add_source(roster_url)

                self._save_vacant_legislator(person)
            else:
                party_override = {
                    " Green": "Democratic",
                    " Sisco": "Republican"
                }

                if party == "" and full_name in party_override:
                    party = party_override[full_name]

                details_url = self._rep_details_url.format(district)
                details_page = lxml.html.fromstring(self.get(details_url).text)

                person = Person(name=full_name,
                                primary_org=chamber,
                                district=district,
                                party=party)
                person.extras = {
                    "first_name": first_name,
                    "last_name": last_name
                }
                person.add_source(roster_url)
                person.add_source(details_url)
                person.add_link(details_url)

                email = details_page.xpath(
                    '//*[@id="ContentPlaceHolder1_lblAddresses"] '
                    '//a[starts-with(@href,"mailto:")]/@href')
                if len(email) > 0 and email[0].lower() != "mailto:":
                    email = email[0].split(":")[1]
                else:
                    email = None

                person.add_contact_detail(type="address",
                                          value=address,
                                          note="Capitol Office")
                if phone:
                    person.add_contact_detail(type="voice",
                                              value=phone,
                                              note="Capitol Office")
                if email:
                    person.add_contact_detail(type="email",
                                              value=email,
                                              note="Capitol Office")

                picture = details_page.xpath(
                    '//*[@id="ContentPlaceHolder1_imgPhoto"]/@src')
                if len(picture) > 0:
                    person.image = picture[0]

                yield person