def process_item(self, item): # strip leading zero district = str(int(item.get("id"))) image = CSS(".mem-pic a img").match_one(item).get("src") name = CSS(".mem-name a").match_one(item) district_addr, capitol_addr = self.process_addresses(item) # email, twitter, facebook are all sometimes present try: email = CSS(".mem-email a").match_one(item).text.strip() except SelectorError: email = "" try: twitter = CSS(".fa-twitter").match_one(item) twitter = twitter.getparent().get("href").split("/")[-1] except SelectorError: twitter = "" try: facebook = CSS(".fa-facebook").match_one(item) facebook = facebook.getparent().get("href").split("/")[-1] except SelectorError: facebook = "" party = self.party_mapping[district][1] p = Person( state="ny", chamber="lower", image=image, party=party, district=district, name=name.text.strip(), email=email, ) p.add_link(url=name.get("href")) p.add_source(url=name.get("href")) if twitter: p.ids["twitter"] = twitter if facebook: p.ids["facebook"] = facebook p.district_office.address = district_addr["address"] p.district_office.voice = district_addr["phone"] p.district_office.fax = district_addr["fax"] p.capitol_office.address = capitol_addr["address"] p.capitol_office.voice = capitol_addr["phone"] p.capitol_office.fax = capitol_addr["fax"] return p
def process_item(self, item): try: link = CSS("a").match(item)[1] except SelectorError: self.skip() data = { "last_name": link.text_content(), "url": link.get("href"), } for key, label in self.LABELS.items(): data[key] = CSS(f"[id$={label}]").match_one(item).text_content().strip() party = {"(D)": "Democratic", "(R)": "Republican"}[data["party"]] address = "Hawaii State Capitol, Room " + data["room"] chamber = "upper" if data["chamber"] == "S" else "lower" p = Person( name=data["first_name"] + " " + data["last_name"], state="hi", chamber=chamber, district=data["district"], given_name=data["first_name"], family_name=data["last_name"], party=party, email=data["email"], ) p.capitol_office.address = address p.capitol_office.voice = data["voice"] p.capitol_office.fax = data["fax"] p.add_source(data["url"]) p.add_link(data["url"]) return p