示例#1
0
def record_votes(root, session, chamber):
    for el in root.xpath("//div{}".format("".join(vote_selectors))):
        mv = MaybeVote(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text="passage" if mv.passed else "other",
            result="pass" if mv.passed else "fail",
            classification="passage" if mv.passed else None,
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber,
        )

        v.set_count("yes", mv.yeas or 0)
        v.set_count("no", mv.nays or 0)
        v.set_count("not voting", mv.present or 0)

        for each in mv.votes["yeas"]:
            each = clean_vote_name(each)
            v.yes(each)
        for each in mv.votes["nays"]:
            each = clean_vote_name(each)
            v.no(each)
        for each in mv.votes["present"]:
            each = clean_vote_name(each)
            v.vote("not voting", each)
        for each in mv.votes["absent"]:
            each = clean_vote_name(each)
            v.vote("absent", each)

        yield v
示例#2
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(vote_data["voteDate"],
                                                   "%Y-%m-%d")
        if vote_data["voteType"] == "FLOOR":
            motion = "Floor Vote"
        elif vote_data["voteType"] == "COMMITTEE":
            motion = "{} Vote".format(vote_data["committee"]["name"])
        else:
            raise ValueError("Unknown vote type encountered.")

        if vote_data["version"]:
            motion += " - Version: " + vote_data["version"]

        vote = VoteEvent(
            chamber="upper",
            start_date=vote_datetime.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="fail",
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data["memberVotes"]["items"]

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if "items" in vote_rolls.get("AYE", {}):
            for legislator in vote_rolls["AYE"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        if "items" in vote_rolls.get("AYEWR", {}):
            for legislator in vote_rolls["AYEWR"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        # Count all nay votes.
        if "items" in vote_rolls.get("NAY", {}):
            for legislator in vote_rolls["NAY"]["items"]:
                vote.no(legislator["fullName"])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ("EXC", "ABS", "ABD")
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]["items"]:
                    vote.vote("other", legislator["fullName"])
                    other_count += 1

        vote.result = "pass" if yes_count > no_count else "fail"
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        return vote
示例#3
0
    def scrape_votes_old(self, bill, billname, session):
        vote_url = ("http://archives.legislature.state.oh.us/bills.cfm?ID=" +
                    session + "_" + billname)

        page = self.get(vote_url).text
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = self._tz.localize(
                datetime.datetime.strptime(jlink.text, "%m/%d/%Y")).date()
            date = "{:%Y-%m-%d}".format(date)
            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == "House":
                chamber = "lower"
            elif chamber == "Senate":
                chamber = "upper"
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath("td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath("td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if yes_count > no_count else "fail",
                bill=bill,
                classification="passage",
            )

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            yield vote
示例#4
0
def test_full_vote_event():
    j = create_jurisdiction()
    j.legislative_sessions.create(name="1900", identifier="1900")
    sp1 = ScrapePerson("John Smith", primary_org="lower")
    sp2 = ScrapePerson("Adam Smith", primary_org="lower")
    org = ScrapeOrganization(name="House", classification="lower")
    bill = ScrapeBill("HB 1",
                      "1900",
                      "Axe & Tack Tax Act",
                      from_organization=org._id)
    vote_event = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-01",
        classification="passage:bill",
        result="pass",
        bill_chamber="lower",
        bill="HB 1",
        organization=org._id,
    )
    vote_event.set_count("yes", 20)
    vote_event.yes("John Smith")
    vote_event.no("Adam Smith")

    oi = OrganizationImporter("jid")
    oi.import_data([org.as_dict()])

    pi = PersonImporter("jid")
    pi.import_data([sp1.as_dict(), sp2.as_dict()])

    mi = MembershipImporter("jid", pi, oi, DumbMockImporter())
    mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()])

    bi = BillImporter("jid", oi, pi)
    bi.import_data([bill.as_dict()])

    VoteEventImporter("jid", pi, oi, bi).import_data([vote_event.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session == LegislativeSession.objects.get()
    assert ve.motion_classification == ["passage:bill"]
    assert ve.bill == Bill.objects.get()
    count = ve.counts.get()
    assert count.option == "yes"
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == "John Smith":
            assert v.option == "yes"
            assert v.voter == Person.objects.get(name="John Smith")
        else:
            assert v.option == "no"
            assert v.voter == Person.objects.get(name="Adam Smith")
示例#5
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            r"YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)"
            r"(.*)ABSENT( OR NOT VOTING)? -?\s?"
            r"(\d+)(.*)",
            re.MULTILINE | re.DOTALL,
        )
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == "upper" or actor == "lower":
            vote_chamber = actor
        else:
            vote_chamber = ""

        vote = Vote(
            chamber=vote_chamber,
            start_date=date,
            motion_text=motion,
            result="pass" if passed else "fail",
            identifier=str(uniqid),
            classification="passage",
            bill=bill,
        )
        vote.add_source(url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        yes_votes = re.split(r"\s{2,}", match.group(2).strip())
        no_votes = re.split(r"\s{2,}", match.group(4).strip())
        other_votes = re.split(r"\s{2,}", match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.vote("other", other)

        yield vote
示例#6
0
    def scrape_votes(self, bill):
        bill_num = bill.identifier.split()[1]

        url = (
            "http://wslwebservices.leg.wa.gov/legislationservice.asmx/"
            "GetRollCalls?billNumber=%s&biennium=%s" % (bill_num, self.biennium)
        )
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for rc in xpath(page, "//wa:RollCall"):
            motion = xpath(rc, "string(wa:Motion)")
            seq_no = xpath(rc, "string(wa:SequenceNumber)")

            date = xpath(rc, "string(wa:VoteDate)").split("T")[0]
            date = datetime.datetime.strptime(date, "%Y-%m-%d").date()

            yes_count = int(xpath(rc, "string(wa:YeaVotes/wa:Count)"))
            no_count = int(xpath(rc, "string(wa:NayVotes/wa:Count)"))
            abs_count = int(xpath(rc, "string(wa:AbsentVotes/wa:Count)"))
            ex_count = int(xpath(rc, "string(wa:ExcusedVotes/wa:Count)"))

            other_count = abs_count + ex_count

            agency = xpath(rc, "string(wa:Agency)")
            chamber = {"House": "lower", "Senate": "upper"}[agency]

            vote = Vote(
                chamber=chamber,
                start_date=date,
                motion_text="{} (#{})".format(motion, seq_no),
                result="pass" if yes_count > (no_count + other_count) else "fail",
                bill=bill,
                classification=[],
            )
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("other", other_count)
            vote.add_source(url)
            for sv in xpath(rc, "wa:Votes/wa:Vote"):
                name = xpath(sv, "string(wa:Name)")
                vtype = xpath(sv, "string(wa:VOte)")

                if vtype == "Yea":
                    vote.yes(name)
                elif vtype == "Nay":
                    vote.no(name)
                else:
                    vote.vote("other", name)

            yield vote
示例#7
0
    def parse_vote(self, actor, date, row, session, bill_id, bill_chamber,
                   source):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath(".//span")
        motion = row.text.replace("\u00a0", " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = (
            spans[0].text_content().rsplit("-", 3))
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(("Absent", "Excused")):
                other_votes += self.get_names(span.tail)
        for key, val in {
                "adopted": "pass",
                "passed": "pass",
                "failed": "fail"
        }.items():
            if key in passed.lower():
                passed = val
                break
        vote = VoteEvent(
            chamber=actor,
            start_date=date,
            motion_text=motion,
            bill=bill_id,
            bill_chamber=bill_chamber,
            result=passed,
            classification="passage",
            legislative_session=session,
        )
        vote.add_source(source)
        vote.set_count("yes", int(yes_count))
        vote.set_count("no", int(no_count))
        vote.set_count("absent", int(other_count))
        for name in yes_votes:
            if name and name != "None":
                vote.yes(name)
        for name in no_votes:
            if name and name != "None":
                vote.no(name)
        for name in other_votes:
            if name and name != "None":
                vote.vote("absent", name)
        yield vote
示例#8
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        try:
            resp = self.get(vote_url)
            html = resp.text
        except scrapelib.HTTPError:
            return

        doc = lxml.html.fromstring(html)
        motion = doc.xpath("//p[1]//b[1]/text()")[-1].strip()
        if len(motion) == 0:
            print(motion)
            motion = doc.xpath("//h2[1]/text()")[0].strip()

        vote_count = (
            doc.xpath("//h3[contains(text(),'YEA and ')]/text()")[0].strip().split()
        )
        yeas = int(vote_count[0])
        nays = int(vote_count[3])

        date = doc.xpath("//b[contains(text(),'Date:')]/../text()")[1].strip()
        date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

        vote = VoteEvent(
            chamber="lower",
            start_date=date,
            motion_text=motion,
            result="pass" if yeas > nays else "fail",
            classification="passage",
            legislative_session=session,
            bill=bill_id,
            bill_chamber=chamber,
        )
        vote.set_count("yes", yeas)
        vote.set_count("no", nays)
        vote.add_source(vote_url)
        vote.pupa_id = vote_url

        # first table has YEAs
        for name in doc.xpath("//table[1]//font/text()"):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath("//table[2]//font/text()"):
            vote.no(name.strip())

        yield vote
示例#9
0
def test_full_vote_event():
    create_jurisdiction()
    bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", chamber="lower")
    vote_event = ScrapeVoteEvent(
        legislative_session="1900",
        motion_text="passage",
        start_date="1900-04-01",
        classification="passage:bill",
        result="pass",
        bill_chamber="lower",
        bill="HB 1",
        chamber="lower",
    )
    vote_event.set_count("yes", 20)
    vote_event.yes("John Smith")
    vote_event.no("Adam Smith")

    Person.objects.create(name="John Smith")
    Person.objects.create(name="Adam Smith")
    for person in Person.objects.all():
        person.memberships.create(organization=Organization.objects.get(
            classification="lower"))

    bi = BillImporter("jid")
    bi.import_data([bill.as_dict()])

    VoteEventImporter("jid", bi).import_data([vote_event.as_dict()])

    assert VoteEvent.objects.count() == 1
    ve = VoteEvent.objects.get()
    assert ve.legislative_session == LegislativeSession.objects.get()
    assert ve.motion_classification == ["passage:bill"]
    assert ve.bill == Bill.objects.get()
    count = ve.counts.get()
    assert count.option == "yes"
    assert count.value == 20
    votes = list(ve.votes.all())
    assert len(votes) == 2
    for v in ve.votes.all():
        if v.voter_name == "John Smith":
            assert v.option == "yes"
            assert v.voter == Person.objects.get(name="John Smith")
        else:
            assert v.option == "no"
            assert v.voter == Person.objects.get(name="Adam Smith")
示例#10
0
    def asvote(self):
        v = VoteEvent(
            chamber=self.chamber(),
            start_date=self.date(),
            motion_text=self.motion(),
            result="pass" if self.passed() else "fail",
            classification="passage",
            bill=self.bill,
        )
        v.dedupe_key = self.url  # URL contains sequence number
        v.set_count("yes", self.yes_count())
        v.set_count("no", self.no_count())
        v.set_count("other", self.other_count())

        for voter in self.yes_votes():
            v.yes(voter)
        for voter in self.no_votes():
            v.no(voter)
        for voter in self.other_votes():
            v.vote("other", voter)
        v.add_source(self.url)
        return v
示例#11
0
    def handle_page(self):
        # Checks to see if any vote totals are provided
        if (len(
                self.doc.xpath(
                    '//span[contains(@id, "ctl00_MainContent_lblTotal")]/text()'
                )) > 0):
            (date,
             ) = self.doc.xpath('//span[contains(@id, "lblDate")]/text()')
            date = format_datetime(
                datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p"),
                "US/Eastern")
            # ctl00_MainContent_lblTotal //span[contains(@id, "ctl00_MainContent_lblTotal")]
            yes_count = int(
                self.doc.xpath('//span[contains(@id, "lblYeas")]/text()')[0])
            no_count = int(
                self.doc.xpath('//span[contains(@id, "lblNays")]/text()')[0])
            other_count = int(
                self.doc.xpath('//span[contains(@id, "lblMissed")]/text()')[0])
            result = "pass" if yes_count > no_count else "fail"

            (committee,
             ) = self.doc.xpath('//span[contains(@id, "lblCommittee")]/text()')
            (action,
             ) = self.doc.xpath('//span[contains(@id, "lblAction")]/text()')
            motion = "{} ({})".format(action, committee)

            vote = VoteEvent(
                start_date=date,
                bill=self.kwargs["bill"],
                chamber="lower",
                motion_text=motion,
                result=result,
                classification="committee",
            )
            vote.add_source(self.url)
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("not voting", other_count)

            for member_vote in self.doc.xpath(
                    '//ul[contains(@class, "vote-list")]/li'):
                if not member_vote.text_content().strip():
                    continue

                (member, ) = member_vote.xpath("span[2]//text()")
                (member_vote, ) = member_vote.xpath("span[1]//text()")

                member = member.strip()
                if member_vote == "Y":
                    vote.yes(member)
                elif member_vote == "N":
                    vote.no(member)
                elif member_vote == "-":
                    vote.vote("not voting", member)
                # Parenthetical votes appear to not be counted in the
                # totals for Yea, Nay, _or_ Missed
                elif re.search(r"\([YN]\)", member_vote):
                    continue
                else:
                    raise ValueError(
                        "Unknown vote type found: {}".format(member_vote))

            yield vote
示例#12
0
    def scrape(self, session=None):
        if not session:
            session = self.latest_session()
            self.info("no session specified, using %s", session)
        for category in self._categories:
            leg_listing_url = (
                self._API_BASE_URL + f"BulkData/{category['categoryId']}/{session}"
            )
            resp = requests.post(leg_listing_url, headers=self._headers, verify=False,)
            resp.raise_for_status()
            leg_listing = resp.json()

            for leg in leg_listing:

                bill = Bill(
                    leg["legislationNumber"],
                    legislative_session=session,
                    title=leg["title"],
                    classification=category["name"],
                )
                bill.add_source(leg_listing_url)
                bill_url = (
                    f"https://lims.dccouncil.us/Legislation/{leg['legislationNumber']}"
                )
                bill.add_source(bill_url)

                if leg['lawNumber']:
                    bill.extras['lawNumber'] = leg['lawNumber']

                # Actions
                for hist in leg["legislationHistory"]:
                    hist_date = datetime.datetime.strptime(
                        hist["actionDate"], "%b %d, %Y"
                    )
                    hist_date = self._TZ.localize(hist_date)
                    hist_action = hist["actionDescription"]
                    if hist_action.split()[0] in ["OtherAmendment", "OtherMotion"]:
                        hist_action = hist_action[5:]
                    hist_class = self.classify_action(hist_action)

                    if "mayor" in hist_action.lower():
                        actor = "executive"
                    else:
                        actor = "legislature"
                    bill.add_action(
                        hist_action, hist_date, classification=hist_class, chamber=actor
                    )

                    # Documents with download links
                    if hist["downloadURL"] and ("download" in hist["downloadURL"]):
                        download = hist["downloadURL"]
                        if not download.startswith("http"):
                            download = "https://lims.dccouncil.us/" + download

                        mimetype = (
                            "application/pdf" if download.endswith("pdf") else None
                        )
                        is_version = False
                        # figure out if it's a version from type/name
                        possible_version_types = [
                            "SignedAct",
                            "Introduction",
                            "Enrollment",
                            "Engrossment",
                        ]
                        for vt in possible_version_types:
                            if vt.lower() in download.lower():
                                is_version = True
                                doc_type = vt

                        if "amendment" in download.lower():
                            doc_type = "Amendment"

                        if is_version:
                            bill.add_version_link(
                                doc_type,
                                download,
                                media_type=mimetype,
                                on_duplicate="ignore",
                            )
                        else:
                            bill.add_document_link(
                                hist["actionDescription"],
                                download,
                                media_type=mimetype,
                                on_duplicate="ignore",
                            )

                # Grabs Legislation details
                leg_details_url = (
                    self._API_BASE_URL
                    + f"LegislationDetails/{leg['legislationNumber']}"
                )
                details_resp = requests.get(
                    leg_details_url, headers=self._headers, verify=False,
                )
                details_resp.raise_for_status()
                leg_details = details_resp.json()

                # Sponsors
                for i in leg_details["introducers"]:
                    name = i["memberName"]
                    bill.add_sponsorship(
                        name,
                        classification="primary",
                        entity_type="person",
                        primary=True,
                    )

                # Co-sponsor
                if leg_details["coSponsors"]:
                    for cs in leg_details["coSponsors"]:
                        name = i["memberName"]
                        bill.add_sponsorship(
                            name,
                            classification="cosponsor",
                            entity_type="person",
                            primary=True,
                        )

                # Committee Hearing Doc
                for commHearing in leg_details["committeeHearing"]:
                    if commHearing["hearingRecord"]:
                        bill.add_document_link(
                            commHearing["hearingType"],
                            commHearing["hearingRecord"],
                            media_type="application/pdf",
                            on_duplicate="ignore",
                        )

                for committeeMarkup in leg_details["committeeMarkup"]:
                    if committeeMarkup["committeeReport"]:
                        bill.add_document_link(
                            "Committee Markup",
                            committeeMarkup["committeeReport"],
                            media_type="application/pdf",
                            on_duplicate="ignore",
                        )

                # Actions and Votes
                if leg_details["actions"]:
                    # To prevent duplicate votes
                    vote_ids = []
                    for act in leg_details["actions"]:
                        action_name = act["action"]
                        action_date = datetime.datetime.strptime(
                            act["actionDate"][:10], "%Y-%m-%d"
                        )
                        action_date = self._TZ.localize(action_date)

                        if action_name.split()[0] == "Other":
                            action_name = " ".join(action_name.split()[1:])

                        if "mayor" in action_name.lower():
                            actor = "executive"
                        else:
                            actor = "legislature"

                        # Documents and Versions
                        if act["attachment"]:
                            mimetype = (
                                "application/pdf"
                                if act["attachment"].endswith("pdf")
                                else None
                            )
                            is_version = False
                            # figure out if it's a version from type/name
                            possible_version_types = [
                                "SignedAct",
                                "Introduction",
                                "Enrollment",
                                "Engrossment",
                            ]
                            for vt in possible_version_types:
                                if vt.lower() in act["attachment"].lower():
                                    is_version = True
                                    doc_type = vt

                            if "amendment" in act["attachment"].lower():
                                doc_type = "Amendment"

                            if is_version:
                                bill.add_version_link(
                                    doc_type,
                                    act["attachment"],
                                    media_type=mimetype,
                                    on_duplicate="ignore",
                                )
                            else:
                                bill.add_document_link(
                                    doc_type,
                                    act["attachment"],
                                    media_type=mimetype,
                                    on_duplicate="ignore",
                                )

                        # Votes
                        if act["voteDetails"]:
                            result = act["voteDetails"]["voteResult"]
                            if result:
                                status = self._vote_statuses[result.lower()]
                                id_text = (
                                    str(leg["legislationNumber"])
                                    + "-"
                                    + action_name
                                    + "-"
                                    + result
                                )
                                if id_text not in vote_ids:
                                    vote_ids.append(id_text)
                                    action_class = self.classify_action(action_name)
                                    v = VoteEvent(
                                        identifier=id_text,
                                        chamber=actor,
                                        start_date=action_date,
                                        motion_text=action_name,
                                        result=status,
                                        classification=action_class,
                                        bill=bill,
                                    )
                                    v.add_source(leg_listing_url)

                                    yes_count = (
                                        no_count
                                    ) = absent_count = abstain_count = other_count = 0
                                    for leg_vote in act["voteDetails"]["votes"]:
                                        mem_name = leg_vote["councilMember"]
                                        if leg_vote["vote"] == "Yes":
                                            yes_count += 1
                                            v.yes(mem_name)
                                        elif leg_vote["vote"] == "No":
                                            no_count += 1
                                            v.no(mem_name)
                                        elif leg_vote["vote"] == "Absent":
                                            absent_count += 1
                                            v.vote("absent", mem_name)
                                        elif leg_vote["vote"] == "Recused":
                                            v.vote("abstain", mem_name)
                                            abstain_count += 1
                                        elif leg_vote["vote"] == "Present":
                                            v.vote("other", mem_name)
                                            other_count += 1
                                        else:
                                            # Incase anything new pops up
                                            other_count += 1
                                            v.vote("other", mem_name)

                                    v.set_count("yes", yes_count)
                                    v.set_count("no", no_count)
                                    v.set_count("absent", absent_count)
                                    v.set_count("abstain", abstain_count)
                                    v.set_count("other", other_count)
                                    yield v

                yield bill
示例#13
0
    def scrape(self, session=None):
        HTML_TAGS_RE = r"<.*?>"

        if session is None:
            session = self.latest_session()

        year_slug = self.jurisdiction.get_year_slug(session)

        # Load all bills and resolutions via the private API
        bills_url = "http://legislature.vermont.gov/bill/loadBillsReleased/{}/".format(
            year_slug)
        bills_json = self.get(bills_url).text
        bills = json.loads(bills_json)["data"] or []

        bills_url = "http://legislature.vermont.gov/bill/loadBillsIntroduced/{}/".format(
            year_slug)
        bills_json = self.get(bills_url).text
        bills.extend(json.loads(bills_json)["data"] or [])

        resolutions_url = "http://legislature.vermont.gov/bill/loadAllResolutionsByChamber/{}/both".format(
            year_slug)
        resolutions_json = self.get(resolutions_url).text
        bills.extend(json.loads(resolutions_json)["data"] or [])

        # Parse the information from each bill
        for info in bills:
            # Strip whitespace from strings
            info = {k: v.strip() for k, v in info.items()}

            # Identify the bill type and chamber
            if info["BillNumber"].startswith("J.R.H."):
                bill_type = "joint resolution"
                bill_chamber = "lower"
            elif info["BillNumber"].startswith("J.R.S."):
                bill_type = "joint resolution"
                bill_chamber = "upper"

            elif info["BillNumber"].startswith("H.C.R."):
                bill_type = "concurrent resolution"
                bill_chamber = "lower"
            elif info["BillNumber"].startswith("S.C.R."):
                bill_type = "concurrent resolution"
                bill_chamber = "upper"

            elif info["BillNumber"].startswith("H.R."):
                bill_type = "resolution"
                bill_chamber = "lower"
            elif info["BillNumber"].startswith("S.R."):
                bill_type = "resolution"
                bill_chamber = "upper"

            elif info["BillNumber"].startswith("PR."):
                bill_type = "constitutional amendment"
                if info["Body"] == "H":
                    bill_chamber = "lower"
                elif info["Body"] == "S":
                    bill_chamber = "upper"
                else:
                    raise AssertionError("Amendment not tied to chamber")

            elif info["BillNumber"].startswith("H."):
                bill_type = "bill"
                bill_chamber = "lower"
            elif info["BillNumber"].startswith("S."):
                bill_type = "bill"
                bill_chamber = "upper"

            else:
                raise AssertionError("Unknown bill type found: '{}'".format(
                    info["BillNumber"]))

            bill_id_original_format = (info["BillNumber"].replace(".",
                                                                  "").replace(
                                                                      " ", ""))

            bill_id = bill_id_original_format

            # put one space back in between type and number
            bill_id = re.sub(r"([a-zA-Z]+)(\d+)", r"\1 \2", bill_id)

            # Create the bill using its basic information
            bill = Bill(
                identifier=bill_id,
                legislative_session=session,
                chamber=bill_chamber,
                title=info["Title"],
                classification=bill_type,
            )
            if "resolution" in bill_type:
                bill.add_source(resolutions_url)
            else:
                bill.add_source(bills_url)

            # Load the bill's information page to access its metadata
            bill_url = "http://legislature.vermont.gov/bill/status/{0}/{1}".format(
                year_slug, info["BillNumber"])
            doc = self.lxmlize(bill_url)
            bill.add_source(bill_url)

            # Capture sponsors
            sponsors = doc.xpath(
                '//dl[@class="summary-table"]/dt[text()="Sponsor(s)"]/'
                "following-sibling::dd[1]/ul/li")
            sponsor_type = "primary"
            for sponsor in sponsors:
                if sponsor.xpath("span/text()") == ["Additional Sponsors"]:
                    sponsor_type = "cosponsor"
                    continue

                sponsor_name = (sponsor.xpath("a/text()")[0].replace(
                    "Rep.", "").replace("Sen.", "").strip())
                if sponsor_name and not (sponsor_name[:5] == "Less"
                                         and len(sponsor_name) == 5):
                    bill.add_sponsorship(
                        name=sponsor_name,
                        classification=sponsor_type,
                        entity_type="person",
                        primary=(sponsor_type == "primary"),
                    )

            # Capture bill text versions
            # Warning: There's a TODO in VT's source code saying 'move this to where it used to be'
            # so leave in the old and new positions
            versions = doc.xpath(
                '//dl[@class="summary-table"]/dt[text()="Bill/Resolution Text"]/'
                "following-sibling::dd[1]/ul/li/a |"
                '//ul[@class="bill-path"]//a')

            for version in versions:
                if version.xpath("text()"):
                    bill.add_version_link(
                        note=version.xpath("text()")[0],
                        url=version.xpath("@href")[0].replace(" ", "%20"),
                        media_type="application/pdf",
                    )

            # Identify the internal bill ID, used for actions and votes
            # If there is no internal bill ID, then it has no extra information
            try:
                internal_bill_id = re.search(
                    r'"bill/loadBillDetailedStatus/.+?/(\d+)"',
                    lxml.etree.tostring(doc).decode("utf-8"),
                ).group(1)
            except AttributeError:
                self.warning("Bill {} appears to have no activity".format(
                    info["BillNumber"]))
                yield bill
                continue

            # Capture actions
            actions_url = "http://legislature.vermont.gov/bill/loadBillDetailedStatus/{0}/{1}".format(
                year_slug, internal_bill_id)
            actions_json = self.get(actions_url)

            # Checks if page actually has json posted
            if "json" in actions_json.headers.get("Content-Type"):
                actions = json.loads(actions_json.text)["data"]
                # Checks to see if any data is actually there
                if actions == "":
                    continue
            else:
                continue
            bill.add_source(actions_url)

            chambers_passed = set()
            for action in actions:
                action = {k: v for k, v in action.items() if v is not None}

                if "Signed by Governor" in action["FullStatus"]:
                    actor = "executive"
                elif action["ChamberCode"] == "H":
                    actor = "lower"
                elif action["ChamberCode"] == "S":
                    actor = "upper"
                else:
                    raise AssertionError("Unknown actor for bill action")

                # Categorize action
                if "Signed by Governor" in action["FullStatus"]:
                    # assert chambers_passed == set("HS")
                    action_type = "executive-signature"
                elif "Vetoed by the Governor" in action["FullStatus"]:
                    action_type = "executive-veto"
                elif ("Read first time" in action["FullStatus"]
                      or "Read 1st time" in action["FullStatus"]):
                    action_type = "introduction"
                elif "Reported favorably" in action["FullStatus"]:
                    action_type = "committee-passage-favorable"
                elif actor == "lower" and any(
                        x.lower().startswith("aspassed")
                        for x in action["keywords"].split(";")):
                    action_type = "passage"
                    chambers_passed.add("H")
                elif actor == "upper" and any(
                        x.lower().startswith(" aspassed")
                        or x.lower().startswith("aspassed")
                        for x in action["keywords"].split(";")):
                    action_type = "passage"
                    chambers_passed.add("S")
                else:
                    action_type = None

                # Manual fix for data error in
                # https://legislature.vermont.gov/bill/status/2020/H.511
                action["StatusDate"] = action["StatusDate"].replace(
                    "/0209", "/2019")

                # Manual fix for data error in
                # https://legislature.vermont.gov/bill/status/2020/H.754
                if bill_id == "H 754" and session == "2019-2020":
                    action["StatusDate"] = action["StatusDate"].replace(
                        "/0202", "/2020")

                # https://legislature.vermont.gov/bill/status/2020/H.942
                if bill_id == "H 942" and session == "2019-2020":
                    action["StatusDate"] = action["StatusDate"].replace(
                        "/0200", "/2020")

                action_date = datetime.datetime.strftime(
                    datetime.datetime.strptime(action["StatusDate"],
                                               "%m/%d/%Y"),
                    "%Y-%m-%d",
                )
                # strftime doesn't always pad year value (%Y)  (https://bugs.python.org/issue32195)
                # and sometimes this state has typos in year part of the StatusDate value
                # which can cause validation errors, so fix leading zeroes if they are missing
                if action_date.find("-") < 4:
                    action_date = ("0" *
                                   (4 - action_date.find("-"))) + action_date

                bill.add_action(
                    description=re.sub(HTML_TAGS_RE, "", action["FullStatus"]),
                    date=action_date,
                    chamber=actor,
                    classification=action_type,
                )

            # Capture votes
            votes_url = "http://legislature.vermont.gov/bill/loadBillRollCalls/{0}/{1}".format(
                year_slug, internal_bill_id)
            votes_json = self.get(votes_url).text
            votes = json.loads(votes_json)["data"]
            bill.add_source(votes_url)

            for vote in votes:
                roll_call_id = vote["VoteHeaderID"]
                roll_call_url = ("http://legislature.vermont.gov/bill/"
                                 "loadBillRollCallDetails/{0}/{1}".format(
                                     year_slug, roll_call_id))
                roll_call_json = self.get(roll_call_url).text
                roll_call = json.loads(roll_call_json)["data"]

                roll_call_yea = []
                roll_call_nay = []
                roll_call_not_voting = []
                for member in roll_call:
                    (member_name,
                     _district) = member["MemberName"].split(" of ")
                    member_name = member_name.strip()

                    if member["MemberVote"] == "Yea":
                        roll_call_yea.append(member_name)
                    elif member["MemberVote"] == "Nay":
                        roll_call_nay.append(member_name)
                    else:
                        roll_call_not_voting.append(member_name)

                if ("Passed -- " in vote["FullStatus"]
                        # seems like we've seen both
                        or "Governor overridden" in vote["FullStatus"] or
                        "Governor overriden" in vote["FullStatus"]):
                    did_pass = True
                elif ("Failed -- " in vote["FullStatus"] or
                      "Veto of the Governor sustained" in vote["FullStatus"]):
                    did_pass = False
                else:
                    raise AssertionError("Roll call vote result is unclear: " +
                                         vote["FullStatus"])

                # Check vote counts
                yea_count = int(
                    re.search(r"Yeas = (\d+)", vote["FullStatus"]).group(1))
                nay_count = int(
                    re.search(r"Nays = (\d+)", vote["FullStatus"]).group(1))

                vote_start_date = datetime.datetime.strftime(
                    datetime.datetime.strptime(vote["StatusDate"], "%m/%d/%Y"),
                    "%Y-%m-%d",
                )
                motion_text = re.sub(HTML_TAGS_RE, "",
                                     vote["FullStatus"]).strip()
                vote_identifer = (vote["StatusDate"] + "--" + motion_text +
                                  "--" + roll_call_url)
                vote_to_add = VoteEvent(
                    identifier=vote_identifer,
                    bill=bill,
                    chamber=("lower"
                             if vote["ChamberCode"] == "H" else "upper"),
                    start_date=vote_start_date,
                    motion_text=motion_text,
                    result="pass" if did_pass else "fail",
                    classification="passage",
                    legislative_session=session,
                )
                vote_to_add.add_source(roll_call_url)

                vote_to_add.set_count("yes", yea_count)
                vote_to_add.set_count("no", nay_count)
                vote_to_add.set_count("not voting", len(roll_call_not_voting))

                for member in roll_call_yea:
                    vote_to_add.yes(member)
                for member in roll_call_nay:
                    vote_to_add.no(member)
                for member in roll_call_not_voting:
                    vote_to_add.vote("not voting", member)

                yield vote_to_add

            # Witnesses:
            #   http://legislature.vermont.gov/bill/loadBillWitnessList/{year_slug}/{internal_bill_id}
            witnesses_doc_link_url = "https://legislature.vermont.gov/bill/print/2020/{0}/witnesses".format(
                bill_id_original_format)
            bill.add_document_link(note="Witness List",
                                   url=witnesses_doc_link_url,
                                   media_type="text/html")

            # Conference committee members:
            #   http://legislature.vermont.gov/bill/loadBillConference/{year_slug}/{bill_number}
            conferees_doc_link_url = "https://legislature.vermont.gov/bill/print/2020/{0}/conference".format(
                bill_id_original_format)
            page = self.lxmlize(conferees_doc_link_url)
            no_data = page.xpath('//div[@class="no-data"]/text()')
            if not no_data:
                bill.add_document_link(
                    note="Conference Committee Members",
                    url=conferees_doc_link_url,
                    media_type="text/html",
                )

            # Committee meetings:
            #   http://legislature.vermont.gov/committee/loadHistoryByBill/{year_slug}?LegislationId={internal_bill_id}
            meetings_doc_link_url = "https://legislature.vermont.gov/bill/print/2020/{0}/meetings".format(
                bill_id_original_format)
            bill.add_document_link(
                note="Committee Meetings",
                url=meetings_doc_link_url,
                media_type="text/html",
            )

            yield bill
示例#14
0
    def scrape_votes(self, bill, url):
        page = lxml.html.fromstring(self.get(url).text.replace(u"\xa0", " "))

        seen_rcs = set()

        re_ns = "http://exslt.org/regular-expressions"
        path = r"//p[re:test(text(), 'OKLAHOMA\s+(HOUSE|STATE\s+SENATE)')]"
        for header in page.xpath(path, namespaces={"re": re_ns}):
            bad_vote = False
            # Each chamber has the motion name on a different line of the file
            if "HOUSE" in header.xpath("string()"):
                chamber = "lower"
                motion_index = 8
            else:
                chamber = "upper"
                motion_index = 13

            motion = header.xpath("string(following-sibling::p[%d])" %
                                  motion_index).strip()
            motion = re.sub(r"\s+", " ", motion)
            if not motion.strip():
                self.warning("Motion text not found")
                return
            match = re.match(r"^(.*) (PASSED|FAILED)$", motion)
            if match:
                motion = match.group(1)
                passed = match.group(2) == "PASSED"
            else:
                passed = None

            rcs_p = header.xpath(
                "following-sibling::p[contains(., 'RCS#')]")[0]
            rcs_line = rcs_p.xpath("string()").replace(u"\xa0", " ")
            rcs = re.search(r"RCS#\s+(\d+)", rcs_line).group(1)

            if rcs in seen_rcs:
                continue
            else:
                seen_rcs.add(rcs)

            date_line = rcs_p.getnext().xpath("string()")
            date = re.search(r"\d+/\d+/\d+", date_line).group(0)
            date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

            vtype = None
            counts = collections.defaultdict(int)
            votes = collections.defaultdict(list)

            seen_yes = False

            for sib in header.xpath("following-sibling::p")[13:]:
                line = sib.xpath("string()").replace("\r\n", " ").strip()
                if "*****" in line:
                    break
                regex = (r"(YEAS|NAYS|EXCUSED|VACANT|CONSTITUTIONAL "
                         r"PRIVILEGE|NOT VOTING|N/V)\s*:\s*(\d+)(.*)")
                match = re.match(regex, line)
                if match:
                    if match.group(1) == "YEAS" and "RCS#" not in line:
                        vtype = "yes"
                        seen_yes = True
                    elif match.group(1) == "NAYS" and seen_yes:
                        vtype = "no"
                    elif match.group(1) == "VACANT":
                        continue  # skip these
                    elif seen_yes:
                        vtype = "other"
                    if seen_yes and match.group(3).strip():
                        self.warning("Bad vote format, skipping.")
                        bad_vote = True
                    counts[vtype] += int(match.group(2))
                elif seen_yes:
                    for name in line.split("   "):
                        if not name:
                            continue
                        if "HOUSE" in name or "SENATE " in name:
                            continue
                        votes[vtype].append(name.strip())

            if bad_vote:
                continue

            if passed is None:
                passed = counts["yes"] > (counts["no"] + counts["other"])

            vote = Vote(
                chamber=chamber,
                start_date=date.strftime("%Y-%m-%d"),
                motion_text=motion,
                result="pass" if passed else "fail",
                bill=bill,
                classification="passage",
            )
            vote.set_count("yes", counts["yes"])
            vote.set_count("no", counts["no"])
            vote.set_count("other", counts["other"])
            vote.dedupe_key = url + "#" + rcs

            vote.add_source(url)

            for name in votes["yes"]:
                vote.yes(name)
            for name in votes["no"]:
                if ":" in name:
                    raise Exception(name)
                vote.no(name)
            for name in votes["other"]:
                vote.vote("other", name)

            yield vote
示例#15
0
    def scrape_vote(self, bill, vote_json, session):

        if vote_json["amendmentNumber"]:
            motion = "{}: {}".format(vote_json["amendmentNumber"], vote_json["action"])
        else:
            motion = vote_json["action"]

        result = (
            "pass" if vote_json["yesVotesCount"] > vote_json["noVotesCount"] else "fail"
        )

        v = VoteEvent(
            chamber=self.chamber_abbrev_map[vote_json["chamber"]],
            start_date=self.parse_local_date(vote_json["voteDate"]),
            motion_text=motion,
            result=result,
            legislative_session=session,
            bill=bill,
            classification="other",
        )

        v.set_count(option="yes", value=vote_json["yesVotesCount"])
        v.set_count("no", vote_json["noVotesCount"])
        v.set_count("absent", vote_json["absentVotesCount"])
        v.set_count("excused", vote_json["excusedVotesCount"])
        v.set_count("other", vote_json["conflictVotesCount"])

        for name in vote_json["yesVotes"].split(","):
            if name:
                name = name.strip()
                v.yes(name)

        for name in vote_json["noVotes"].split(","):
            if name:
                name = name.strip()
                v.no(name)

        # add votes with other classifications
        # option can be 'yes', 'no', 'absent',
        # 'abstain', 'not voting', 'paired', 'excused'
        for name in vote_json["absentVotes"].split(","):
            if name:
                name = name.strip()
                v.vote(option="absent", voter=name)

        for name in vote_json["excusedVotes"].split(","):
            if name:
                name = name.strip()
                v.vote(option="excused", voter=name)

        for name in vote_json["conflictVotes"].split(","):
            if name:
                name = name.strip()
                v.vote(option="other", voter=name)

        source_url = "http://lso.wyoleg.gov/Legislation/{}/{}".format(
            session, vote_json["billNumber"]
        )
        v.add_source(source_url)

        yield v
示例#16
0
    def scrape_vote(self, bill, vote_id, session):
        vote_url = (
            "https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId"
        )
        form = {"rollCallId": vote_id, "sort": "", "group": "", "filter": ""}

        self.info("Fetching vote {} for {}".format(vote_id, bill.identifier))
        page = self.post(url=vote_url, data=form, allow_redirects=True).json()
        if page:
            roll = page["Model"]
            vote_chamber = self.chamber_map[roll["ChamberName"]]
            # "7/1/16 01:00 AM"
            vote_date = dt.datetime.strptime(
                roll["TakenAtDateTime"],
                "%m/%d/%y %I:%M %p").strftime("%Y-%m-%d")

            # TODO: What does this code mean?
            vote_motion = roll["RollCallVoteType"]

            vote_passed = "pass" if roll[
                "RollCallStatus"] == "Passed" else "fail"
            other_count = (int(roll["NotVotingCount"]) +
                           int(roll["VacantVoteCount"]) +
                           int(roll["AbsentVoteCount"]) +
                           int(roll["ConflictVoteCount"]))
            vote = VoteEvent(
                chamber=vote_chamber,
                start_date=vote_date,
                motion_text=vote_motion,
                result=vote_passed,
                bill=bill,
                legislative_session=session,
                classification=[],
            )
            vote_pdf_url = ("https://legis.delaware.gov"
                            "/json/RollCallController/GenerateRollCallPdf"
                            "?rollCallId={}&chamberId={}".format(
                                vote_id, self.chamber_codes[vote_chamber]))
            # Vote URL is just a generic search URL with POSTed data,
            # so provide a different link
            vote.add_source(vote_pdf_url)
            vote.dedupe_key = vote_pdf_url
            vote.set_count("yes", roll["YesVoteCount"])
            vote.set_count("no", roll["NoVoteCount"])
            vote.set_count("other", other_count)

            for row in roll["AssemblyMemberVotes"]:
                # AssemblyMemberId looks like it should work here,
                # but for some sessions it's bugged to only return session
                try:
                    voter = self.legislators_by_short[str(row["ShortName"])]
                    name = voter["DisplayName"]
                except KeyError:
                    self.warning("could not find legislator short name %s",
                                 row["ShortName"])
                    name = row["ShortName"]
                if row["SelectVoteTypeCode"] == "Y":
                    vote.yes(name)
                elif row["SelectVoteTypeCode"] == "N":
                    vote.no(name)
                else:
                    vote.vote("other", name)

            yield vote
    def scrape_vote(self, bill, date, url):
        page = self.get(url).text
        page = lxml.html.fromstring(page)

        header = page.xpath("string(//h3[contains(@id, 'hdVote')])")

        if "No Bill Action" in header:
            self.warning("bad vote header -- skipping")
            return
        location = header.split(", ")[1]

        if location.startswith("House"):
            chamber = "lower"
        elif location.startswith("Senate"):
            chamber = "upper"
        elif location.startswith("Joint"):
            chamber = "legislature"
        else:
            raise ScrapeError("Bad chamber: %s" % location)

        motion = ", ".join(header.split(", ")[2:]).strip()
        if motion:
            # If we can't detect a motion, skip this vote
            yes_count = int(page.xpath("string(//span[contains(@id, 'tdAyes')])"))
            no_count = int(page.xpath("string(//span[contains(@id, 'tdNays')])"))
            excused_count = int(
                page.xpath("string(//span[contains(@id, 'tdExcused')])")
            )
            absent_count = int(page.xpath("string(//span[contains(@id, 'tdAbsent')])"))

            passed = yes_count > no_count

            if motion.startswith("Do Pass"):
                type = "passage"
            elif motion == "Concurred in amendments":
                type = "amendment"
            elif motion == "Veto override":
                type = "veto_override"
            else:
                type = "other"

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if passed else "fail",
                classification=type,
                bill=bill,
            )
            # The vote page URL has a unique ID
            # However, some votes are "consent calendar" events,
            # and relate to the passage of _multiple_ bills
            # These can't be modeled yet in Pupa, but for now we can
            # append a bill ID to the URL that forms the `pupa_id`
            # https://github.com/opencivicdata/pupa/issues/308
            vote.pupa_id = "{}#{}".format(url, bill.identifier.replace(" ", ""))

            vote.add_source(url)
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("excused", excused_count)
            vote.set_count("absent", absent_count)

            for td in page.xpath("//table[@id='tblVoteTotals']/tbody/tr/td"):
                option_or_person = td.text.strip()
                if option_or_person in ("Aye", "Yea"):
                    vote.yes(td.getprevious().text.strip())
                elif option_or_person == "Nay":
                    vote.no(td.getprevious().text.strip())
                elif option_or_person == "Excused":
                    vote.vote("excused", td.getprevious().text.strip())
                elif option_or_person == "Absent":
                    vote.vote("absent", td.getprevious().text.strip())

            yield vote
示例#18
0
    def scrape_votes_for_chamber(self, chamber, vote_data, bill, link):
        raw_vote_data = re.split(r"\w+? by [\w ]+?\s+-", vote_data.strip())[1:]

        motion_count = 1

        for raw_vote in raw_vote_data:
            raw_vote = raw_vote.split(
                u"\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")
            motion = raw_vote[0]

            if len(raw_vote) < 2:
                continue

            vote_date = re.search(r"(\d+/\d+/\d+)", motion)
            if vote_date:
                vote_date = datetime.datetime.strptime(vote_date.group(),
                                                       "%m/%d/%Y")

            passed = ("Passed" in motion or "Recommended for passage" in motion
                      or "Rec. for pass" in motion or "Adopted" in raw_vote[1])
            vote_regex = re.compile(r"\d+$")
            aye_regex = re.compile(r"^.+voting aye were: (.+) -")
            no_regex = re.compile(r"^.+voting no were: (.+) -")
            not_voting_regex = re.compile(
                r"^.+present and not voting were: (.+) -")
            yes_count = 0
            no_count = 0
            not_voting_count = 0
            ayes = []
            nos = []
            not_voting = []

            for v in raw_vote[1:]:
                v = v.strip()
                if v.startswith("Ayes...") and vote_regex.search(v):
                    yes_count = int(vote_regex.search(v).group())
                elif v.startswith("Noes...") and vote_regex.search(v):
                    no_count = int(vote_regex.search(v).group())
                elif v.startswith(
                        "Present and not voting...") and vote_regex.search(v):
                    not_voting_count += int(vote_regex.search(v).group())
                elif aye_regex.search(v):
                    ayes = aye_regex.search(v).groups()[0].split(", ")
                elif no_regex.search(v):
                    nos = no_regex.search(v).groups()[0].split(", ")
                elif not_voting_regex.search(v):
                    not_voting += not_voting_regex.search(v).groups()[0].split(
                        ", ")

            motion = motion.strip()
            motion = motion.replace("&AMP;", "&")  # un-escape ampersands
            if motion in self._seen_votes:
                motion = "{} ({})".format(motion, motion_count)
                motion_count += 1
            self._seen_votes.add(motion)

            vote = VoteEvent(
                motion_text=motion,
                start_date=vote_date.strftime("%Y-%m-%d")
                if vote_date else None,
                classification="passage",
                result="pass" if passed else "fail",
                chamber=chamber,
                bill=bill,
            )
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("not voting", not_voting_count)
            vote.add_source(link)

            seen = set()
            for a in ayes:
                if a in seen:
                    continue
                vote.yes(a)
                seen.add(a)
            for n in nos:
                if n in seen:
                    continue
                vote.no(n)
                seen.add(n)
            for n in not_voting:
                if n in seen:
                    continue
                vote.vote("not voting", n)
                seen.add(n)

            yield vote
示例#19
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/h" in url:
            vote_chamber = "lower"
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = "upper"
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        page = self.get(url, verify=False).text

        if "BUDGET ADDRESS" in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath("string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r"[^\d]*(\d+)[^\d]*", yes_count).group(1))

        no_count = page.xpath("string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r"[^\d]*(\d+)[^\d]*", no_count).group(1))

        other_count = page.xpath("string(//span[contains(., 'Those absent')])")
        other_count = int(re.match(r"[^\d]*(\d+)[^\d]*", other_count).group(1))

        need_count = page.xpath("string(//span[contains(., 'Necessary for')])")
        need_count = int(re.match(r"[^\d]*(\d+)[^\d]*", need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r".*Taken\s+on\s+(\d+/\s?\d+)", date).group(1)
        date = date.replace(" ", "")
        date = datetime.datetime.strptime(
            date + " " + bill.legislative_session, "%m/%d %Y"
        ).date()

        # not sure about classification.
        vote = Vote(
            chamber=vote_chamber,
            start_date=date,
            motion_text=name,
            result="pass" if yes_count > need_count else "fail",
            classification="passage",
            bill=bill,
        )
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)
        vote.add_source(url)
        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (i + name_offset)).strip()

                if not name or name == "VACANT":
                    continue
                name = string.capwords(name)
                if "Y" in row.xpath("string(td[%d])" % (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" % (i + no_offset)):
                    vote.no(name)
                else:
                    vote.vote("other", name)

        yield vote
示例#20
0
    def process_vote(self, vote, bill, member_ids):
        try:
            motion = vote["ReadingDescription"]
        except KeyError:
            self.logger.warning(
                "Can't even figure out what we're voting on. Skipping.")
            return

        if "VoteResult" not in vote:
            if "postponed" in motion.lower():
                result = "Postponed"
                status = (
                    "pass"  # because we're talking abtout the motion, not the amendment
                )
            elif "tabled" in motion.lower():
                result = "Tabled"
                status = "pass"
            else:
                self.logger.warning("Could not find result of vote, skipping.")
                return
        else:
            result = vote["VoteResult"].strip().lower()
            statuses = {
                "approved": "pass",
                "disapproved": "fail",
                "failed": "fail",
                "declined": "fail",
                "passed": "pass",
            }

            try:
                status = statuses[result]
            except KeyError:
                self.logger.warning(
                    "Unexpected vote result '{result},' skipping vote.".format(
                        result=result))
                return

        date = self.date_format(vote["DateOfVote"])

        leg_votes = vote["MemberVotes"]
        v = VoteEvent(
            chamber="legislature",
            start_date=date,
            motion_text=motion,
            result=status,
            classification="passage",
            bill=bill,
        )
        yes_count = no_count = other_count = 0
        for leg_vote in leg_votes:
            mem_name = member_ids[int(leg_vote["MemberId"])]
            if leg_vote["Vote"] == "1":
                yes_count += 1
                v.yes(mem_name)
            elif leg_vote["Vote"] == "2":
                no_count += 1
                v.no(mem_name)
            else:
                other_count += 1
                v.vote("other", mem_name)

        v.set_count("yes", yes_count)
        v.set_count("no", no_count)
        v.set_count("other", other_count)

        # the documents for the readings are inside the vote
        # level in the json, so we'll deal with them here
        # and also add relevant actions

        if "amendment" in motion.lower():
            if status:
                t = "amendment-passage"
            elif result in ["Tabled", "Postponed"]:
                t = "amendment-deferral"
            else:
                t = "amendment-failure"
        elif "first reading" in motion.lower():
            t = "reading-1"
        elif "1st reading" in motion.lower():
            t = "reading-1"
        elif "second reading" in motion.lower():
            t = "reading-2"
        elif "2nd reading" in motion.lower():
            t = "reading-2"
        elif "third reading" in motion.lower():
            t = "reading-3"
        elif "3rd reading" in motion.lower():
            t = "reading-3"
        elif "final reading" in motion.lower():
            t = "reading-3"
        elif result in ["Tabled", "Postponed"]:
            t = None
        else:
            t = None

        if t:
            if "amendment" in t:
                vote["type"] = "amendment"
            elif "reading" in t:
                vote["type"] = t.replace("bill:", "")

        # some documents/versions are hiding in votes.
        if "AttachmentPath" in vote:
            is_version = False
            try:
                if vote["DocumentType"] in [
                        "enrollment",
                        "engrossment",
                        "introduction",
                ]:
                    is_version = True
            except KeyError:
                pass

            if motion in ["enrollment", "engrossment", "introduction"]:
                is_version = True

            self.add_documents(vote["AttachmentPath"], bill, is_version)

        return v
    def parse_bill_actions_table(self, bill, action_table, bill_id, session,
                                 url, bill_chamber):

        # vote types that have been reconsidered since last vote of that type
        reconsiderations = set()

        for action in action_table.xpath("*")[1:]:
            date = action[0].text_content()
            date = dt.datetime.strptime(date, "%m/%d/%Y").strftime("%Y-%m-%d")
            actor_code = action[1].text_content().upper()
            string = action[2].text_content()
            actor = self._vote_type_map[actor_code]
            act_type, committees = categorize_action(string)
            # XXX: Translate short-code to full committee name for the
            #      matcher.

            real_committees = []

            if committees:
                for committee in committees:
                    try:
                        committee = self.short_ids[committee]["name"]
                        real_committees.append(committee)
                    except KeyError:
                        pass
            act = bill.add_action(string,
                                  date,
                                  chamber=actor,
                                  classification=act_type)

            for committee in real_committees:
                act.add_related_entity(name=committee,
                                       entity_type="organization")
            vote = self.parse_vote(string)

            if vote:
                v, motion = vote
                motion_text = (("Reconsider: " + motion)
                               if actor in reconsiderations else motion)
                vote = VoteEvent(
                    start_date=date,
                    chamber=actor,
                    bill=bill_id,
                    bill_chamber=bill_chamber,
                    legislative_session=session,
                    motion_text=motion_text,
                    result="pass" if "passed" in string.lower() else "fail",
                    classification="passage",
                )
                reconsiderations.discard(actor)
                vote.add_source(url)
                vote.set_count("yes", int(v["n_yes"] or 0))
                vote.set_count("no", int(v["n_no"] or 0))
                vote.set_count("not voting", int(v["n_excused"] or 0))
                for voter in split_specific_votes(v["yes"]):
                    voter = self.clean_voter_name(voter)
                    vote.yes(voter)
                for voter in split_specific_votes(v["yes_resv"]):
                    voter = self.clean_voter_name(voter)
                    vote.yes(voter)
                for voter in split_specific_votes(v["no"]):
                    voter = self.clean_voter_name(voter)
                    vote.no(voter)
                for voter in split_specific_votes(v["excused"]):
                    voter = self.clean_voter_name(voter)
                    vote.vote("not voting", voter)

                yield vote

            elif re.search("reconsider", string, re.IGNORECASE):
                reconsiderations.add(actor)
示例#22
0
    def parse_html_vote(self, bill, actor, date, motion, url, uniqid):
        try:
            page = self.get(url).text
        except scrapelib.HTTPError:
            self.warning("A vote page not found for bill {}".format(
                bill.identifier))
            return
        page = lxml.html.fromstring(page)
        page.make_links_absolute(url)
        descr = page.xpath("//b")[0].text_content()
        if descr == "":
            # New page method
            descr = page.xpath("//center")[0].text

        if "on voice vote" in descr:
            return

        if "committee" in descr.lower():
            yield from self.scrape_committee_vote(bill, actor, date, motion,
                                                  page, url, uniqid)
            return

        passed = None
        if "Passed" in descr:
            passed = True
        elif "Failed" in descr:
            passed = False
        elif "UTAH STATE LEGISLATURE" in descr:
            return
        elif descr.strip() == "-":
            return
        else:
            self.warning(descr)
            raise NotImplementedError("Can't see if we passed or failed")

        headings = page.xpath("//b")[1:]
        votes = page.xpath("//table")
        sets = zip(headings, votes)
        vdict = {}
        for (typ, votes) in sets:
            txt = typ.text_content()
            arr = [x.strip() for x in txt.split("-", 1)]
            if len(arr) != 2:
                continue
            v_txt, count = arr
            v_txt = v_txt.strip()
            count = int(count)
            people = [
                x.text_content().strip()
                for x in votes.xpath(".//font[@face='Arial']")
            ]

            vdict[v_txt] = {"count": count, "people": people}

        vote = Vote(
            chamber=actor,
            start_date=date,
            motion_text=motion,
            result="pass" if passed else "fail",
            bill=bill,
            classification="passage",
            identifier=str(uniqid),
        )
        vote.set_count("yes", vdict["Yeas"]["count"])
        vote.set_count("no", vdict["Nays"]["count"])
        vote.set_count("other", vdict["Absent or not voting"]["count"])
        vote.add_source(url)

        for person in vdict["Yeas"]["people"]:
            vote.yes(person)
        for person in vdict["Nays"]["people"]:
            vote.no(person)
        for person in vdict["Absent or not voting"]["people"]:
            vote.vote("other", person)

        yield vote
示例#23
0
    def scrape_pdf_for_votes(self, session, actor, date, motion, href):
        warned = False
        # vote indicator, a few spaces, a name, newline or multiple spaces
        # VOTE_RE = re.compile('(Y|N|E|NV|A|P|-)\s{2,5}(\w.+?)(?:\n|\s{2})')
        COUNT_RE = re.compile(
            r"^(\d+)\s+YEAS?\s+(\d+)\s+NAYS?\s+(\d+)\s+PRESENT(?:\s+(\d+)\s+NOT\sVOTING)?\s*$"
        )
        PASS_FAIL_WORDS = {
            "PASSED": "pass",
            "PREVAILED": "fail",
            "ADOPTED": "pass",
            "CONCURRED": "pass",
            "FAILED": "fail",
            "LOST": "fail",
        }

        pdflines = self.fetch_pdf_lines(href)

        if not pdflines:
            return False

        yes_count = no_count = present_count = 0
        yes_votes = []
        no_votes = []
        present_votes = []
        excused_votes = []
        not_voting = []
        absent_votes = []
        passed = None
        counts_found = False
        vote_lines = []
        for line in pdflines:
            # consider pass/fail as a document property instead of a result of the vote count
            # extract the vote count from the document instead of just using counts of names
            if not line.strip():
                continue
            elif line.strip() in PASS_FAIL_WORDS:
                # Crash on duplicate pass/fail status that differs from previous status
                if passed is not None and passed != PASS_FAIL_WORDS[line.strip()]:
                    raise Exception("Duplicate pass/fail matches in [%s]" % href)
                passed = PASS_FAIL_WORDS[line.strip()]
            elif COUNT_RE.match(line):
                (yes_count, no_count, present_count, not_voting_count) = COUNT_RE.match(
                    line
                ).groups()
                yes_count = int(yes_count)
                no_count = int(no_count)
                present_count = int(present_count)
                counts_found = True
            elif counts_found:
                for value in VOTE_VALUES:
                    if re.search(r"^\s*({})\s+\w".format(value), line):
                        vote_lines.append(line)
                        break

        votes = find_columns_and_parse(vote_lines)
        for name, vcode in votes.items():
            if name == "Mr. Speaker":
                name = session_details[session]["speaker"]
            elif name == "Mr. President":
                name = session_details[session]["president"]
            else:
                # Converts "Davis,William" to "Davis, William".
                name = re.sub(r"\,([a-zA-Z])", r", \1", name)

            if vcode == "Y":
                yes_votes.append(name)
            elif vcode == "N":
                no_votes.append(name)
            elif vcode == "P":
                present_votes.append(name)
            elif vcode == "E":
                excused_votes.append(name)
            elif vcode == "NV":
                not_voting.append(name)
            elif vcode == "A":
                absent_votes.append(name)

        # fake the counts
        if yes_count == 0 and no_count == 0 and present_count == 0:
            yes_count = len(yes_votes)
            no_count = len(no_votes)
        else:  # audit
            if yes_count != len(yes_votes):
                self.warning(
                    "Mismatched yes count [expect: %i] [have: %i]"
                    % (yes_count, len(yes_votes))
                )
                warned = True
            if no_count != len(no_votes):
                self.warning(
                    "Mismatched no count [expect: %i] [have: %i]"
                    % (no_count, len(no_votes))
                )
                warned = True

        if passed is None:
            if actor["classification"] == "lower":  # senate doesn't have these lines
                self.warning(
                    "No pass/fail word found; fall back to comparing yes and no vote."
                )
                warned = True
            passed = "pass" if yes_count > no_count else "fail"

        classification, _ = _categorize_action(motion)
        vote_event = VoteEvent(
            legislative_session=session,
            motion_text=motion,
            classification=classification,
            organization=actor,
            start_date=date,
            result=passed,
        )
        for name in yes_votes:
            vote_event.yes(name)
        for name in no_votes:
            vote_event.no(name)
        for name in present_votes:
            vote_event.vote("other", name)
        for name in excused_votes:
            vote_event.vote("excused", name)
        for name in not_voting:
            vote_event.vote("not voting", name)
        for name in absent_votes:
            vote_event.vote("absent", name)

        vote_event.set_count("yes", yes_count)
        vote_event.set_count("no", no_count)
        vote_event.set_count("other", present_count)
        vote_event.set_count("excused", len(excused_votes))
        vote_event.set_count("absent", len(absent_votes))
        vote_event.set_count("not voting", len(not_voting))

        vote_event.add_source(href)

        # for distinguishing between votes with the same id and on same day
        vote_event.dedupe_key = href

        if warned:
            self.warning("Warnings were issued. Best to check %s" % href)
        return vote_event
示例#24
0
    def process_vote(self, votes, url, base_url, bill, legislators,
                     chamber_dict, vote_results):
        for v in votes["items"]:
            try:
                v["yeas"]
            except KeyError:
                # sometimes the actual vote is buried a second layer deep
                v = self.get(base_url + v["link"]).json()
                try:
                    v["yeas"]
                except KeyError:
                    self.logger.warning("No vote info available, skipping")
                    continue

            try:
                chamber = chamber_dict[v["chamber"]]
            except KeyError:
                chamber = "lower" if "house" in v["apn"] else "upper"
            try:
                date = self._tz.localize(
                    datetime.datetime.strptime(v["date"], "%m/%d/%y"))
                date = "{:%Y-%m-%d}".format(date)
            except KeyError:
                try:
                    date = self._tz.localize(
                        datetime.datetime.strptime(v["occurred"], "%m/%d/%y"))
                    date = "{:%Y-%m-%d}".format(date)
                except KeyError:
                    self.logger.warning("No date found for vote, skipping")
                    continue
            try:
                motion = v["action"]
            except KeyError:
                motion = v["motiontype"]

            if motion in self._vote_motion_dict:
                motion_text = self._vote_motion_dict[motion]
            else:
                self.warning(
                    "Unknown vote code {}, please add to _vote_motion_dict".
                    format(motion))
                motion_text = v["results"]

            # Sometimes Ohio's SOLAR will only return part of the JSON, so in that case skip
            if not motion and isinstance(v["yeas"], str) and isinstance(
                    v["nays"], str):
                waringText = 'Malformed JSON found for vote ("revno" of {}); skipping'
                self.warning(waringText.format(v["revno"]))
                continue

            result = v.get("results") or v.get("passed")
            if result is None:
                if len(v["yeas"]) > len(v["nays"]):
                    result = "passed"
                else:
                    result = "failed"

            passed = vote_results[result.lower()]
            if "committee" in v:
                vote = VoteEvent(
                    chamber=chamber,
                    start_date=date,
                    motion_text=motion_text,
                    result="pass" if passed else "fail",
                    # organization=v["committee"],
                    bill=bill,
                    classification="committee-passage",
                )
            else:
                vote = VoteEvent(
                    chamber=chamber,
                    start_date=date,
                    motion_text=motion_text,
                    result="pass" if passed else "fail",
                    classification="passage",
                    bill=bill,
                )
            # Concatenate the bill identifier and vote identifier to avoid collisions
            vote.dedupe_key = "{}:{}".format(bill.identifier.replace(" ", ""),
                                             v["revno"])
            # the yea and nay counts are not displayed, but vote totals are
            # and passage status is.
            yes_count = 0
            no_count = 0
            absent_count = 0
            excused_count = 0
            for voter_id in v["yeas"]:
                vote.yes(legislators[voter_id])
                yes_count += 1
            for voter_id in v["nays"]:
                vote.no(legislators[voter_id])
                no_count += 1
            if "absent" in v:
                for voter_id in v["absent"]:
                    vote.vote("absent", legislators[voter_id])
                    absent_count += 1
            if "excused" in v:
                for voter_id in v["excused"]:
                    vote.vote("excused", legislators[voter_id])
                    excused_count += 1

            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("absent", absent_count)
            vote.set_count("excused", excused_count)
            # check to see if there are any other things that look
            # like vote categories, throw a warning if so
            for key, val in v.items():
                if (type(val) == list and len(val) > 0
                        and key not in ["yeas", "nays", "absent", "excused"]):
                    if val[0] in legislators:
                        self.logger.warning(
                            "{k} looks like a vote type that's not being counted."
                            " Double check it?".format(k=key))
            vote.add_source(url)

            yield vote
    def scrape_vote(self, bill, date, url):
        page = self.get(url).json()

        location = page["actionLog"]["FullName"]
        if location:
            if "House" in location:
                chamber = "lower"
            elif "Senate" in location:
                chamber = "upper"
            elif "Joint" in location:
                chamber = "legislature"
            else:
                self.warning("Bad Vote chamber: '%s', skipping" % location)
                return
        else:
            self.warning("Bad Vote chamber: '%s', skipping" % location)
            return

        motion = page["actionLog"]["StatusText"]
        if motion:
            # If we can't detect a motion, skip this vote
            yes_count = page["Yeas"]
            no_count = page["Nays"]
            excused_count = page["Excused"]
            absent_count = page["Absent"]

            passed = yes_count > no_count

            if motion.startswith("Do Pass"):
                vtype = "passage"
            elif motion == "Concurred in amendments":
                vtype = "amendment"
            # commenting out until we add these back to OS-core
            # elif motion == "Veto override":
            #     vtype = "veto-override"
            else:
                vtype = []

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if passed else "fail",
                classification=vtype,
                bill=bill,
            )
            # differentiate nearly identical votes
            vote.dedupe_key = url

            vote.add_source(url)
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("excused", excused_count)
            vote.set_count("absent", absent_count)

            for person in page["RollCalls"]:
                option = person["Vote1"]
                if option in ("Aye", "Yea"):
                    vote.yes(person["UniqueName"])
                elif option == "Nay":
                    vote.no(person["UniqueName"])
                elif option == "Excused":
                    vote.vote("excused", person["UniqueName"])
                elif option == "Absent":
                    vote.vote("absent", person["UniqueName"])

            yield vote
示例#26
0
    def _parse_votes(self, url, vote, bill):
        """Given a vote url and a vote object, extract the voters and
        the vote counts from the vote page and update the vote object.
        """
        if url.lower().endswith(".pdf"):

            try:
                resp = self.get(url)
            except HTTPError:
                # This vote document wasn't found.
                msg = "No document found at url %r" % url
                self.logger.warning(msg)
                return

            try:
                v = PDFCommitteeVote(url, resp.content, bill)
                return v.asvote()
            except PDFCommitteeVoteParseError:
                # Warn and skip.
                self.warning("Could't parse committee vote at %r" % url)
                return

        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)

        # Yes, no, excused, absent.
        try:
            vals = doc.xpath("//table")[1].xpath("tr/td/text()")
        except IndexError:
            # Most likely was a bogus link lacking vote data.
            return

        yes_count, no_count, excused_count, absent_count = map(int, vals)

        # Get the motion.
        try:
            motion = doc.xpath("//br")[-1].tail.strip()
        except IndexError:
            # Some of them mysteriously have no motion listed.
            motion = vote["action"]

        if not motion:
            motion = vote["action"]

        vote["motion"] = motion

        action = vote["action"]
        vote_url = vote["vote_url"]

        vote = VoteEvent(
            chamber=vote["chamber"],
            start_date=vote["date"],
            motion_text=vote["motion"],
            result="fail",  # placeholder
            classification="passage",
            bill=bill,
            bill_action=vote["action"],
        )
        vote.dedupe_key = vote_url  # URL contains sequence number
        vote.add_source(vote_url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("excused", excused_count)
        vote.set_count("absent", absent_count)

        for text in doc.xpath("//table")[2].xpath("tr/td/text()"):
            if not text.strip("\xa0"):
                continue
            v, name = filter(None, text.split("\xa0"))
            # Considering Name is brackets as short name
            regex = re.compile(r".*?\((.*?)\)")
            short_name = re.findall(regex, name)
            if len(short_name) > 0:
                note = "Short Name: " + short_name[0]
            else:
                note = ""
            # Name without brackets like 'Kary, Douglas'
            name = re.sub(r"[\(\[].*?[\)\]]", "", name)
            if v == "Y":
                vote.yes(name, note=note)
            elif v == "N":
                vote.no(name, note=note)
            elif v == "E":
                vote.vote("excused", name, note=note)
            elif v == "A":
                vote.vote("absent", name, note=note)

        # code to determine value of `passed`
        passed = None

        # some actions take a super majority, so we aren't just
        # comparing the yeas and nays here.
        for i in vote_passage_indicators:
            if i in action:
                passed = True
                break
        for i in vote_failure_indicators:
            if i in action and passed:
                # a quick explanation:  originally an exception was
                # thrown if both passage and failure indicators were
                # present because I thought that would be a bug in my
                # lists.  Then I found 2007 HB 160.
                # Now passed = False if the nays outnumber the yays..
                # I won't automatically mark it as passed if the yays
                # ounumber the nays because I don't know what requires
                # a supermajority in MT.
                if no_count >= yes_count:
                    passed = False
                    break
                else:
                    raise Exception("passage and failure indicator"
                                    "both present at: %s" % url)
            if i in action and passed is None:
                passed = False
                break
        for i in vote_ambiguous_indicators:
            if i in action:
                passed = yes_count > no_count
                break
        if passed is None:
            raise Exception("Unknown passage at: %s" % url)

        vote.result = "pass" if passed else "fail"

        return vote