def process_item(self, item): try: link = CSS("a").match(item)[1] except SelectorError: self.skip() data = { "last_name": link.text_content(), "url": link.get("href"), } for key, label in self.LABELS.items(): data[key] = CSS(f"[id$={label}]").match_one( item).text_content().strip() party = {"(D)": "Democratic", "(R)": "Republican"}[data["party"]] address = "Hawaii State Capitol, Room " + data["room"] chamber = "upper" if data["chamber"] == "S" else "lower" p = ScrapePerson( name=data["first_name"] + " " + data["last_name"], state="hi", chamber=chamber, district=data["district"], given_name=data["first_name"], family_name=data["last_name"], party=party, email=data["email"], ) p.capitol_office.address = address p.capitol_office.voice = data["voice"] p.capitol_office.fax = data["fax"] p.add_source(data["url"]) p.add_link(data["url"]) return p
def process_item(self, item): try: title = XPath("..//preceding-sibling::h3/text()").match(item) except SelectorError: title = XPath("../../..//preceding-sibling::h3/text()").match(item) for comm_name in title: if (comm_name == "Standing Committees" or comm_name == "Appropriations Subcommittees"): name_link = CSS("a").match_one(item) name = name_link.text_content() source = name_link.get("href") if comm_name == "Standing Committees": com = ScrapeCommittee(name=name, chamber=self.chamber) else: com = ScrapeCommittee( name=name, classification="subcommittee", chamber=self.chamber, parent="Appropriations", ) return SenateCommitteeDetail(com, source=source) else: self.skip()
def process_item(self, item): # skip header rows if ( len(CSS("td").match(item)) == 1 or CSS("td").match(item)[0].get("class") == "header" ): self.skip() first_link = CSS("td a").match(item)[0] name = first_link.text_content() detail_link = first_link.get("href") district = CSS("td").match(item)[3].text_content() party_letter = CSS("td").match(item)[4].text_content() party_dict = {"D": "Democratic", "R": "Republican", "I": "Independent"} party = party_dict[party_letter] p = ScrapePerson( name=name, state="il", party=party, chamber=self.chamber, district=district, ) p.add_source(self.source.url) p.add_source(detail_link) p.add_link(detail_link, note="homepage") return LegDetail(p, source=detail_link)
def process_item(self, item): com_link = CSS("a").match(item)[0] name = com_link.text_content() com = ScrapeCommittee(name=name, classification="committee", chamber=self.chamber) detail_link = com_link.get("href") com.add_source(detail_link) com.add_link(detail_link, note="homepage") return CommitteeDetail(com, source=detail_link)
def process_page(self): p = self.input district = CSS("div.hidden-xs.mem-info h3").match_one(self.root).text_content() title, district = re.search(r"(.+)\s\|\sDistrict\s(\d+)", district).groups() p.district = district if title != "Representative": p.extras["title"] = title assistant = CSS("div.hidden-xs.mem-info a").match(self.root)[0] assistant_name = assistant.text_content() assistant_email = assistant.get("href") assistant_email = re.search(r"mailto:(.+)", assistant_email).groups()[0] assistant_phones = ( CSS("div.hidden-xs.mem-info p.no-margin").match(self.root)[1].text_content() ) phone1, phone2 = re.search(r"Phone:\s(.+)\s\|\s(.+)", assistant_phones).groups() p.extras["assistant name"] = assistant_name p.extras["assistant email"] = assistant_email p.extras["assistant phone1"] = phone1 p.extras["assistant phone2"] = phone2 press_name = ( CSS("div.hidden-xs.mem-info div.small-block.last p") .match(self.root)[0] .text_content() ) press_phone = ( CSS("div.hidden-xs.mem-info div.small-block.last p") .match(self.root)[1] .text_content() ) press_phone = re.search(r"Phone:\s(.+)", press_phone).groups()[0] press_email = ( CSS("div.hidden-xs.mem-info div.small-block.last a") .match_one(self.root) .text_content() ) p.extras["press contact name"] = press_name p.extras["press contact phone"] = press_phone p.extras["press contact email"] = press_email return p
def process_item(self, item): com_link = CSS("a").match_one(item) name = com_link.text_content() com = ScrapeCommittee( name=name, chamber=self.chamber, ) detail_link = com_link.get("href") com.add_source(self.source.url) com.add_source(detail_link) com.add_link(detail_link, note="homepage") # this link has broken html (not able to grab member info) # just returning name, chamber, and link if detail_link == "https://legislature.idaho.gov/sessioninfo/2021/joint/cec/": return com return DetailCommitteePage(com, source=detail_link)
def process_page(self): name = ( CSS(".container-main #ContentPlaceHolder1_lblMember") .match_one(self.root) .text_content() ) if self.input.chamber == "upper": name_split = re.split("SENATOR|, ", name) elif self.input.chamber == "lower": name_split = re.split("REPRESENTATIVE|, ", name) full_name = name_split[2] + name_split[1] table = CSS("#ContentPlaceHolder1_TabSenator_TabLeg_gvLEG").match_one(self.root) party = ( district ) = county = phone = fax = street = office = city = postal = email = "" for tr in CSS("tr").match(table): type, info = CSS("td").match(tr) type = type.text_content() info = info.text_content() if type == "Affiliation:": party = "" if info == "(R)": party = "Republican" elif info == "(D)": party = "Democrat" else: party = info elif type == "District:": district = info.split(" ")[2] elif type == "County:": county = info elif type == "Phone Number:": phone = info elif type == "Fax Number:": if info != "": fax = info elif type == "Street:": street = info elif type == "Office:": office = info elif type == "City:": city = info elif type == "Postal Code:": postal = info elif type == "Email:": email = info address = f"{street}, {office}, {city} AL" image = ( CSS("#ContentPlaceHolder1_TabSenator_TabLeg_imgLEG") .match_one(self.root) .get("src") ) p = ScrapePerson( name=full_name.title(), state="al", chamber=self.input.chamber, party=party, district=district, email=email, image=image, ) p.add_source(self.source.url) p.add_source(self.input.url) # This address is the capitol office if re.search("11 South Union Street", street): p.capitol_office.address = address p.capitol_office.voice = phone try: p.capitol_office.fax = fax except ValueError: pass else: p.district_office.address = address p.district_office.voice = phone try: p.district_office.fax = fax except ValueError: pass p.extras["postal code"] = postal p.extras["county"] = county return p