def process_item(self, item): href = XPath("@href").match_one(item) if not href.startswith("http"): href = f"https://ultrasignup.com{href}" race_id = href.split("=")[-1] return RaceResultDetail(dict(race_id=race_id, race_results_url=href, **self.input), source=href)
def process_item(self, item): name = XPath(".//h3/text()").match(item)[0] if name.endswith(" (R)"): party = "Republican" elif name.endswith(" (D)"): party = "Democratic" else: self.skip("skipping " + name) name = name.split(" (")[0] district = ( XPath('.//div[contains(@class, "senator-district")]/div/text()' ).match(item)[0].strip().lstrip("0")) photo_url = XPath(".//img/@src").match_one(item) p = ScrapePerson( name=name, state="ca", chamber="upper", district=district, party=party, image=photo_url, ) capitol_office = XPath( ".//div[contains(@class, 'views-field-field-senator-capitol-office')]//p" ).match_one(item) capitol_address, capitol_phone = ( capitol_office.text_content().replace(u"\xa0", " ").split("; ")) p.capitol_office.address = capitol_address.strip() p.capitol_office.voice = capitol_phone.strip() district_office = XPath( ".//div[contains(@class, 'views-field-field-senator-district-office')]" ).match_one(item) for line in district_office.text_content().strip().splitlines(): try: if re.search(r"District Offices?", line): continue addr, phone = line.strip().replace(u"\xa0", " ").split("; ") p.add_office( classification="district", address=addr.strip(), voice=phone.strip(), ) except ValueError: # Steven Bradford address/phone separated by period instead of semi-colon if re.search(r"\w+\.\s\(\d{3}\)", line): addr, phone = line.strip().replace(u"\xa0", " ").split(". (") phone = "(" + phone p.add_office( classification="district", address=addr.strip(), voice=phone.strip(), ) url = XPath(".//a/@href").match(item)[0] p.add_link(url) p.add_source(self.source.url) return p