def process_item(self, item): comm_name = XPath("text()").match_one(item) if comm_name in [ "Teleconference How-To Information", "Legislative Process" ]: self.skip() comm_url = XPath("@href").match_one(item) if comm_name.startswith("Joint"): com = ScrapeCommittee(name=comm_name, classification="committee", chamber="legislature") elif comm_name.startswith("Subcommittee"): parent_comm = (item.getparent().getparent().getparent().getparent( ).getchildren()[0].text_content()) com = ScrapeCommittee( name=comm_name, classification="subcommittee", chamber="upper", parent=parent_comm, ) else: com = ScrapeCommittee(name=comm_name, classification="committee", chamber="upper") com.add_source(self.source.url) com.add_source(comm_url) com.add_link(comm_url, note="homepage") return ChooseType(com, source=URL(comm_url))
def process_item(self, item): href = XPath("@href").match_one(item) if not href.startswith("http"): href = f"https://ultrasignup.com{href}" race_id = href.split("=")[-1] return RaceResultDetail(dict(race_id=race_id, race_results_url=href, **self.input), source=href)