def process_page(self): name = self.name_css.match_one(self.root).text.split(maxsplit=1)[1] p = Person( name=name, state="ok", chamber="upper", party=self.party_css.match_one(self.root).text, district=self.district_css.match_one(self.root).text.split()[1], ) p.image = self.image_selector.match_one(self.root).get("href") contact_url = self.source.url.replace("District.aspx", "Contact.aspx") assert contact_url.startswith( "https://www.okhouse.gov/Members/Contact.aspx?District=") p.add_link(contact_url, note="Contact Form") # capitol address check_capitol_address = CSS(".districtheadleft").match( self.root)[0].text_content().strip() if check_capitol_address == "Capitol Address:": capitol_address_div = (CSS(".districtheadleft + div").match( self.root)[0].text_content().strip().splitlines()) p.capitol_office.address = "; ".join( [ln.strip() for ln in capitol_address_div[:-1]]) p.capitol_office.phone = capitol_address_div[-1].strip() return p
def process_page(self): party_district_text = self.root.xpath("//h3/font/text()")[0] party, district = get_party_district(party_district_text) p = Person( name=self.input.name, state="va", chamber=self.chamber, party=party, district=district, ) if self.input.image: p.image = self.input.image p.add_link(self.source.url) p.add_source(self.source.url) self.get_offices(p) return p
def process_item(self, item): chamber_id = item["district"]["chamberType"] p = Person( state="ga", chamber=self.chamber_types[chamber_id], district=str(item["district"]["number"]), name=item["fullName"], family_name=item["name"]["familyName"], given_name=item["name"]["first"], suffix=item["name"]["suffix"] or "", party=self.party_ids[item["party"]], ) # district address da = item["districtAddress"] p.email = da["email"] p.district_office.voice = da["phone"] p.district_office.fax = da["fax"] if da["address1"]: p.district_office.address = da["address1"] if da["address2"]: p.district_office.address += "; " + da["address2"] p.district_office.address += "; {city}, {state} {zip}".format(**da) p.district_office.address = p.district_office.address.strip() # photos if not item["photos"]: pass elif len(item["photos"]) == 1: p.image = item["photos"][0]["url"].split("?")[ 0] # strip off ?size=mpSm for full size else: raise Exception("unknown photos configuration: " + str(item["photos"])) # extras p.extras["residence"] = item["residence"] p.extras["city"] = item["city"] p.extras["georgia_id"] = item["id"] if item["dateVacated"]: p.end_date = item["dateVacated"] url = ( f"https://www.legis.ga.gov/members/{self.chamber_names[chamber_id]}/" f"{item['id']}?session={item['sessionId']}") p.add_source(url) p.add_link(url) return p