Пример #1
0
    def process_page(self):
        name = self.root.xpath('//h2[@class="committeeName"]')[1].text
        if name.startswith("Appropriations Subcommittee"):
            return
            # TODO: restore scraping of Appropriations Subcommittees
            # name = name.replace('Appropriations ', '')
            # parent = {'name': 'Appropriations', 'classification': 'upper'}
            # chamber = None
        else:
            if name.startswith("Committee on"):
                name = name.replace("Committee on ", "")
            parent = None
            chamber = "upper"
        print(name)
        comm = ScrapeCommittee(name=name,
                               classification="committee",
                               chamber=chamber,
                               parent=parent)

        for dt in self.root.xpath('//div[@id="members"]/dl/dt'):
            role = dt.text.replace(": ", "").strip().lower()
            member = dt.xpath("./following-sibling::dd")[0].text_content()
            member = self.clean_name(member)
            comm.add_member(member, role=role)

        for ul in self.root.xpath('//div[@id="members"]/ul/li'):
            member = self.clean_name(ul.text_content())
            comm.add_member(member)

        comm.add_source(self.source.url)

        return comm
Пример #2
0
    def process_item(self, item):

        name = item["CommitteeName"]
        chamber = item["LegislativeBody"]

        if chamber == "H":
            chamber = "lower"
        elif chamber == "S":
            chamber = "upper"
        else:
            # a few Ad Hoc Committees don't have chambers, but are not included in the Standing Committees Scrape anyway
            self.logger.warning("Committee not assigned to chamber")
            chamber = "lower"

        if item["IsSubCommittee"] is False:
            com = ScrapeCommittee(name=name, chamber=chamber)

        else:

            try:
                parent, name = name.split(" Subcommittee on ")
            except ValueError:
                self.logger.warning(f"No parent listed for {name}")

            com = ScrapeCommittee(
                name=name,
                classification="subcommittee",
                chamber=chamber,
                parent=parent,
            )

        members = []
        for member in item["Members"]:

            name = member["FirstName"] + " " + member["LastName"]
            if member["IsChair"]:
                position = "Chair"
            elif member["IsViceChair"]:
                position = "Vice Chair"
            else:
                position = "member"

            # As of now, the API lists all members twice, so we must check for duplicates for members
            if f"{name} {position}" in members:
                continue
            else:
                members.append(f"{name} {position}")
                com.add_member(name, position)

        com.extras["Committee ID"] = item["CommitteeId"]
        com.extras["Committee Short Name"] = item["CommitteeShortName"]
        com.extras["Committee Type"] = item["TypeName"]

        com.add_source(self.source.url)

        return com