Exemplo n.º 1
0
    def parse_vote_page(self, vote_url, bill):
        vote_html = self.get(vote_url).text
        doc = lxml.html.fromstring(vote_html)
        # chamber
        if "senate" in vote_url:
            chamber = "upper"
        else:
            chamber = "lower"

        # date in the following format: Mar 23, 2009
        date = doc.xpath('//td[starts-with(text(), "Legislative")]')[0].text
        date = date.replace(u"\xa0", " ")
        date = datetime.datetime.strptime(date[18:], "%b %d, %Y")

        # motion
        motion = "".join(x.text_content() for x in doc.xpath('//td[@colspan="23"]'))
        if motion == "":
            motion = "No motion given"  # XXX: Double check this. See SJ 3.
        motion = motion.replace(u"\xa0", " ")

        # totals
        tot_class = doc.xpath('//td[contains(text(), "Yeas")]')[0].get("class")
        totals = doc.xpath('//td[@class="%s"]/text()' % tot_class)[1:]
        yes_count = int(totals[0].split()[-1])
        no_count = int(totals[1].split()[-1])
        other_count = int(totals[2].split()[-1])
        other_count += int(totals[3].split()[-1])
        other_count += int(totals[4].split()[-1])
        passed = yes_count > no_count

        vote = VoteEvent(
            bill=bill,
            chamber=chamber,
            start_date=date.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="pass" if passed else "fail",
        )
        vote.pupa_id = vote_url  # contains sequence number
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        # go through, find Voting Yea/Voting Nay/etc. and next tds are voters
        func = None
        for td in doc.xpath("//td/text()"):
            td = td.replace(u"\xa0", " ")
            if td.startswith("Voting Yea"):
                func = vote.yes
            elif td.startswith("Voting Nay"):
                func = vote.no
            elif td.startswith("Not Voting"):
                func = vote.other
            elif td.startswith("Excused"):
                func = vote.other
            elif func:
                td = td.rstrip("*")
                func(td)

        return vote
Exemplo n.º 2
0
def record_votes(root, session, chamber):
    for el in root.xpath("//div{}".format("".join(vote_selectors))):
        mv = MaybeVote(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text="passage" if mv.passed else "other",
            result="pass" if mv.passed else "fail",
            classification="passage" if mv.passed else None,
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber,
        )

        v.set_count("yes", mv.yeas or 0)
        v.set_count("no", mv.nays or 0)
        v.set_count("not voting", mv.present or 0)

        for each in mv.votes["yeas"]:
            each = clean_vote_name(each)
            v.yes(each)
        for each in mv.votes["nays"]:
            each = clean_vote_name(each)
            v.no(each)
        for each in mv.votes["present"]:
            each = clean_vote_name(each)
            v.vote("not voting", each)
        for each in mv.votes["absent"]:
            each = clean_vote_name(each)
            v.vote("absent", each)

        yield v
Exemplo n.º 3
0
def build_vote(session, bill_id, url, vote_record, chamber, motion_text):
    # When they vote in a substitute they mark it as XHB
    bill_id = bill_id.replace("XHB", "HB")
    passed = len(vote_record["yes"]) > len(vote_record["no"])
    vote_event = VoteEvent(
        result="pass" if passed else "fail",
        chamber=chamber,
        start_date=vote_record["date"].strftime("%Y-%m-%d"),
        motion_text=motion_text,
        classification="passage",
        legislative_session=session,
        bill=bill_id,
        bill_chamber="upper" if bill_id[0] == "S" else "lower",
    )
    vote_event.dedupe_key = url
    vote_event.set_count("yes", len(vote_record["yes"]))
    vote_event.set_count("no", len(vote_record["no"]))
    vote_event.set_count("excused", len(vote_record["excused"]))
    vote_event.set_count("absent", len(vote_record["absent"]))
    vote_event.set_count("other", len(vote_record["other"]))
    for vote_type in ["yes", "no", "excused", "absent", "other"]:
        for voter in vote_record[vote_type]:
            vote_event.vote(vote_type, voter)

    vote_event.add_source(url)
    return vote_event
Exemplo n.º 4
0
    def add_vote(self, bill, chamber, date, text, url):
        votes = re.findall(r"Ayes,?[\s]?(\d+)[,;]\s+N(?:oes|ays),?[\s]?(\d+)", text)
        yes, no = int(votes[0][0]), int(votes[0][1])

        vtype = []
        for regex, type in motion_classifiers.items():
            if re.match(regex, text):
                vtype = type
                break

        v = VoteEvent(
            chamber=chamber,
            start_date=TIMEZONE.localize(date),
            motion_text=text,
            result="pass" if yes > no else "fail",
            classification=vtype,
            bill=bill,
        )
        v.dedupe_key = url.split("/")[-1]
        v.set_count("yes", yes)
        v.set_count("no", no)

        # fetch the vote itself
        if url:
            v.add_source(url)

            if "av" in url:
                self.add_house_votes(v, url)
            elif "sv" in url:
                self.add_senate_votes(v, url)

        return v
Exemplo n.º 5
0
    def scrape_votes_old(self, bill, billname, session):
        vote_url = ("http://archives.legislature.state.oh.us/bills.cfm?ID=" +
                    session + "_" + billname)

        page = self.get(vote_url).text
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = self._tz.localize(
                datetime.datetime.strptime(jlink.text, "%m/%d/%Y")).date()
            date = "{:%Y-%m-%d}".format(date)
            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == "House":
                chamber = "lower"
            elif chamber == "Senate":
                chamber = "upper"
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath("td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath("td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if yes_count > no_count else "fail",
                bill=bill,
                classification="passage",
            )

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            yield vote
Exemplo n.º 6
0
    def _parse_senate_votes(self, vote_data, bill, url):
        vote_datetime = datetime.datetime.strptime(vote_data["voteDate"],
                                                   "%Y-%m-%d")
        if vote_data["voteType"] == "FLOOR":
            motion = "Floor Vote"
        elif vote_data["voteType"] == "COMMITTEE":
            motion = "{} Vote".format(vote_data["committee"]["name"])
        else:
            raise ValueError("Unknown vote type encountered.")

        if vote_data["version"]:
            motion += " - Version: " + vote_data["version"]

        vote = VoteEvent(
            chamber="upper",
            start_date=vote_datetime.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="fail",
            bill=bill,
        )

        vote.add_source(url)

        vote_rolls = vote_data["memberVotes"]["items"]

        yes_count, no_count, other_count = 0, 0, 0

        # Count all yea votes.
        if "items" in vote_rolls.get("AYE", {}):
            for legislator in vote_rolls["AYE"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        if "items" in vote_rolls.get("AYEWR", {}):
            for legislator in vote_rolls["AYEWR"]["items"]:
                vote.yes(legislator["fullName"])
                yes_count += 1

        # Count all nay votes.
        if "items" in vote_rolls.get("NAY", {}):
            for legislator in vote_rolls["NAY"]["items"]:
                vote.no(legislator["fullName"])
                no_count += 1

        # Count all other types of votes.
        other_vote_types = ("EXC", "ABS", "ABD")
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]["items"]:
                    vote.vote("other", legislator["fullName"])
                    other_count += 1

        vote.result = "pass" if yes_count > no_count else "fail"
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("other", other_count)

        return vote
Exemplo n.º 7
0
    def scrape_senate_vote(self, bill, url, date):
        try:
            filename, resp = self.urlretrieve(url)
        except scrapelib.HTTPError:
            self.warning("missing vote file %s" % url)
            return

        vote = VoteEvent(
            chamber="upper",
            start_date=date.strftime("%Y-%m-%d"),
            motion_text="Passage",
            # setting 'fail' for now.
            result="fail",
            classification="passage",
            bill=bill,
        )
        vote.add_source(url)
        vote.pupa_id = url

        text = convert_pdf(filename, "text").decode("utf-8")
        os.remove(filename)

        if re.search(r"Yea:\s+\d+\s+Nay:\s+\d+\s+Absent:\s+\d+", text):
            yield from self.scrape_senate_vote_3col(bill, vote, text, url,
                                                    date)
            return

        data = re.split(r"(Yea|Nay|Absent)s?:", text)[::-1]
        data = filter(None, data)
        keymap = dict(yea="yes", nay="no")
        actual_vote = collections.defaultdict(int)
        vote_count = {"yes": 0, "no": 0, "other": 0}
        while True:
            if not data:
                break
            vote_val = data.pop()
            key = keymap.get(vote_val.lower(), "other")
            values = data.pop()
            for name in re.split(r"(?:[\s,]+and\s|[\s,]{2,})", values):
                if name.lower().strip() == "none.":
                    continue
                name = name.replace("..", "")
                name = re.sub(r"\.$", "", name)
                name = name.strip("-1234567890 \n")
                if not name:
                    continue
                vote.vote(key, name)
                actual_vote[vote_val] += 1
                vote_count[key] += 1
            assert actual_vote[vote_val] == vote_count[key]

        for key, value in vote_count.items():
            vote.set_count(key, value)
        # updating result with actual value
        vote.result = ("pass" if vote_count["yes"] >
                       (vote_count["no"] + vote_count["other"]) else "fail")

        yield vote
Exemplo n.º 8
0
def test_vote_event_org_chamber():
    ve = VoteEvent(
        legislative_session="2009",
        motion_text="passage of the bill",
        start_date="2009-01-07",
        result="pass",
        classification="passage",
        chamber="upper",
    )
    assert get_pseudo_id(ve.organization) == {"classification": "upper"}
Exemplo n.º 9
0
def toy_vote_event():
    ve = VoteEvent(
        legislative_session="2009",
        motion_text="passage of the bill",
        start_date="2009-01-07",
        result="pass",
        classification="passage",
    )
    ve.add_source("http://uri.example.com/", note="foo")
    return ve
Exemplo n.º 10
0
def test_org_and_chamber_conflict():
    with pytest.raises(ValueError):
        VoteEvent(
            legislative_session="2009",
            motion_text="passage of the bill",
            start_date="2009-01-07",
            result="pass",
            classification="passage",
            organization="test",
            chamber="lower",
        )
Exemplo n.º 11
0
def test_vote_event_org_dict():
    odict = {"name": "Random Committee", "classification": "committee"}
    ve = VoteEvent(
        legislative_session="2009",
        motion_text="passage of the bill",
        start_date="2009-01-07",
        result="pass",
        classification="passage",
        organization=odict,
    )
    assert get_pseudo_id(ve.organization) == odict
Exemplo n.º 12
0
def test_vote_event_org_obj():
    o = Organization("something", classification="committee")
    ve = VoteEvent(
        legislative_session="2009",
        motion_text="passage of the bill",
        start_date="2009-01-07",
        result="pass",
        classification="passage",
        organization=o,
    )
    assert ve.organization == o._id
Exemplo n.º 13
0
    def parse_vote(self, actor, date, row, session, bill_id, bill_chamber,
                   source):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath(".//span")
        motion = row.text.replace("\u00a0", " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = (
            spans[0].text_content().rsplit("-", 3))
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(("Absent", "Excused")):
                other_votes += self.get_names(span.tail)
        for key, val in {
                "adopted": "pass",
                "passed": "pass",
                "failed": "fail"
        }.items():
            if key in passed.lower():
                passed = val
                break
        vote = VoteEvent(
            chamber=actor,
            start_date=date,
            motion_text=motion,
            bill=bill_id,
            bill_chamber=bill_chamber,
            result=passed,
            classification="passage",
            legislative_session=session,
        )
        vote.add_source(source)
        vote.set_count("yes", int(yes_count))
        vote.set_count("no", int(no_count))
        vote.set_count("absent", int(other_count))
        for name in yes_votes:
            if name and name != "None":
                vote.yes(name)
        for name in no_votes:
            if name and name != "None":
                vote.no(name)
        for name in other_votes:
            if name and name != "None":
                vote.vote("absent", name)
        yield vote
Exemplo n.º 14
0
    def add_archived_votes(self, bill, bill_id):
        bill_id = bill_id.split()
        bill_id[0] = bill_id[0][0]
        if len(bill_id[-1]) == 2:
            bill_id[-1] = "00" + bill_id[-1]
        if len(bill_id[-1]) == 3:
            bill_id[-1] = "0" + bill_id[-1]
        bill_id = "".join(bill_id)

        if bill_id in self.archived_votes:

            for vote_key, legislator_votes in self.archived_votes[
                    bill_id].items():
                (
                    vote_date,
                    r_number,
                    action_number,
                    action_vote_result,
                    archive_url,
                    cod,
                    _,
                ) = vote_key

                if archive_url[-1] == "S":
                    chamber = "upper"
                else:
                    chamber = "lower"

                vote_date = eastern.localize(vote_date)
                vote_date = vote_date.isoformat()

                motion_text = (action_number + r_number + cod +
                               action_vote_result).replace(" ", "_")

                ve = VoteEvent(
                    chamber=chamber,  # TODO: check this
                    start_date=vote_date,
                    motion_text=motion_text,
                    bill=bill,
                    classification=
                    "other",  # No indication on classification for archived votes
                    result=action_vote_result,
                )
                ve.add_source(archive_url)

                for lv in legislator_votes:
                    ve.vote(lv["how_voted"], lv["leg"])

                yield ve
Exemplo n.º 15
0
    def scrape_votes(self, bill, bill_page, chamber):
        vote_links = bill_page.xpath(
            '//table[contains(@class,"history")]//a[contains(@href, "view_votes")]'
        )
        for vote_link in vote_links:
            vote_url = vote_link.attrib["href"]
            date_td, motion_td, *_ = vote_link.xpath("ancestor::tr/td")
            date = datetime.strptime(date_td.text, "%b %d, %Y")
            motion_text = motion_td.text_content()
            vote_page = self.lxmlize(vote_url)
            passed = "Passed" in motion_text or "Advanced" in motion_text
            cells = vote_page.xpath(
                '//div[contains(@class,"table-responsive")]/table//td')
            vote = VoteEvent(
                bill=bill,
                chamber=chamber,
                start_date=TIMEZONE.localize(date),
                motion_text=motion_text,
                classification="passage",
                result="pass" if passed else "fail",
            )

            yes_count = self.process_count(vote_page, "Yes:")
            no_count = self.process_count(vote_page, "No:")
            exc_count = self.process_count(vote_page, "Excused - Not Voting:")
            absent_count = self.process_count(vote_page,
                                              "Absent - Not Voting:")
            present_count = self.process_count(vote_page,
                                               "Present - Not Voting:")

            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("excused", exc_count)
            vote.set_count("absent", absent_count)
            vote.set_count("abstain", present_count)

            query_params = urllib.parse.parse_qs(
                urllib.parse.urlparse(vote_url).query)
            vote.dedupe_key = query_params["KeyID"][0]
            vote.add_source(vote_url)
            for chunk in range(0, len(cells), 2):
                name = cells[chunk].text
                vote_type = cells[chunk + 1].text
                if name and vote_type:
                    vote.vote(VOTE_TYPE_MAP.get(vote_type.lower(), "other"),
                              name)
            yield vote
Exemplo n.º 16
0
    def parse_committee_votes(self, bill, url):
        bill.add_source(url)
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)
        chamber = "upper" if "Senate" in doc.xpath("string(//h1)") else "lower"
        committee = tuple(doc.xpath("//h2")[0].itertext())[-2].strip()
        for link in doc.xpath("//a[contains(@href, 'listVoteSummary.cfm')]"):

            # Date
            for fmt in ("%m/%d/%Y", "%m-%d-%Y"):
                date = link.xpath("../../td")[0].text_content()
                try:
                    date = datetime.datetime.strptime(date, fmt)
                except ValueError:
                    continue
                break

            # Motion
            motion = link.text_content().split(" - ")[-1].strip()
            motion = "Committee vote (%s): %s" % (committee, motion)

            # Roll call
            vote_url = link.attrib["href"]
            rollcall = self.parse_upper_committee_vote_rollcall(bill, vote_url)

            vote = VoteEvent(
                chamber=chamber,
                start_date=tz.localize(date),
                motion_text=motion,
                classification=[],
                result="pass" if rollcall["passed"] else "fail",
                bill=bill,
            )
            vote.dedupe_key = vote_url
            vote.set_count("yes", rollcall["yes_count"])
            vote.set_count("no", rollcall["no_count"])
            vote.set_count("other", rollcall["other_count"])

            for voteval in ("yes", "no", "other"):
                for name in rollcall.get(voteval + "_votes", []):
                    vote.vote(voteval, name)

            vote.add_source(url)
            vote.add_source(vote_url)

            yield vote
Exemplo n.º 17
0
    def scrape_chamber_votes(self, chamber, session):
        url = {
            "upper": "%s/%s" % (RI_URL_BASE, "SVotes"),
            "lower": "%s/%s" % (RI_URL_BASE, "HVotes"),
        }[chamber]
        action = "%s/%s" % (url, "votes.asp")
        dates = self.get_vote_dates(url, session)
        for date in dates:
            votes = self.parse_vote_page(self.post_to(action, date), url,
                                         session)
            for vote_dict in votes:
                for vote in vote_dict.values():
                    count = vote["count"]
                    chamber = {
                        "H": "lower",
                        "S": "upper"
                    }[vote["meta"]["chamber"]]

                    try:
                        bill_id = self._bill_id_by_type[(chamber,
                                                         vote["meta"]["bill"])]
                    except KeyError:
                        self.warning("no such bill_id %s %s", chamber,
                                     vote["meta"]["bill"])
                        continue

                    v = VoteEvent(
                        chamber=chamber,
                        start_date=vote["time"].strftime("%Y-%m-%d"),
                        motion_text=vote["meta"]["extra"]["motion"],
                        result="pass" if count["passage"] else "fail",
                        classification="passage",
                        legislative_session=session,
                        bill=bill_id,
                        bill_chamber=chamber,
                    )
                    v.set_count("yes", int(count["YEAS"]))
                    v.set_count("no", int(count["NAYS"]))
                    v.set_count("other", int(count["NOT VOTING"]))
                    v.add_source(vote["source"])
                    v.dedupe_key = vote["source"]

                    for vt in vote["votes"]:
                        key = {"Y": "yes", "N": "no"}.get(vt["vote"], "other")
                        v.vote(key, vt["name"])
                    yield v
Exemplo n.º 18
0
    def scrape_votes(self, bill, page):
        base_url = "https://apps.azleg.gov/api/BillStatusFloorAction"
        for header in page["FloorHeaders"]:
            params = {
                "billStatusId": page["BillId"],
                "billStatusActionId": header["BillStatusActionId"],
                "includeVotes": "true",
            }
            resp = self.get(base_url, timeout=80, params=params)
            actions = json.loads(resp.content.decode("utf-8"))

            for action in actions:
                if action["Action"] == "No Action":
                    continue
                if action["ReportDate"] is None:
                    continue
                cleaned_date = action["ReportDate"].split(".")[0]
                action_date = datetime.datetime.strptime(
                    cleaned_date, "%Y-%m-%dT%H:%M:%S"
                )
                vote = VoteEvent(
                    chamber={"S": "upper", "H": "lower"}[header["LegislativeBody"]],
                    motion_text=action["Action"],
                    classification="passage",
                    result=(
                        "pass"
                        if action["UnanimouslyAdopted"]
                        or action["Ayes"] > action["Nays"]
                        else "fail"
                    ),
                    start_date=action_date.strftime("%Y-%m-%d"),
                    bill=bill,
                )
                vote.add_source(resp.url)
                vote.set_count("yes", action["Ayes"] or 0)
                vote.set_count("no", action["Nays"] or 0)
                vote.set_count("other", (action["Present"] or 0))
                vote.set_count("absent", (action["Absent"] or 0))
                vote.set_count("excused", (action["Excused"] or 0))
                vote.set_count("not voting", (action["NotVoting"] or 0))

                for v in action["Votes"]:
                    vote_type = {"Y": "yes", "N": "no"}.get(v["Vote"], "other")
                    vote.vote(vote_type, v["Legislator"]["FullName"])
                vote.dedupe_key = resp.url + str(action["ReferralNumber"])
                yield vote
Exemplo n.º 19
0
 def _get_votes(self, date, actor, action, bill, url):
     vre = r"(?P<leader>.*)(AYES|YEAS):\s+(?P<yeas>\d+)\s+(NOES|NAYS):\s+(?P<nays>\d+).*"
     if "YEAS" in action.upper() or "AYES" in action.upper():
         match = re.match(vre, action)
         if match:
             v = match.groupdict()
             yes, no = int(v["yeas"]), int(v["nays"])
             vote = VoteEvent(
                 chamber=actor,
                 motion_text=v["leader"],
                 result="pass" if yes > no else "fail",
                 classification="passage",
                 start_date=TIMEZONE.localize(date),
                 bill=bill,
             )
             vote.add_source(url)
             yield vote
Exemplo n.º 20
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        try:
            resp = self.get(vote_url)
            html = resp.text
        except scrapelib.HTTPError:
            return

        doc = lxml.html.fromstring(html)
        motion = doc.xpath("//p[1]//b[1]/text()")[-1].strip()
        if len(motion) == 0:
            print(motion)
            motion = doc.xpath("//h2[1]/text()")[0].strip()

        vote_count = (
            doc.xpath("//h3[contains(text(),'YEA and ')]/text()")[0].strip().split()
        )
        yeas = int(vote_count[0])
        nays = int(vote_count[3])

        date = doc.xpath("//b[contains(text(),'Date:')]/../text()")[1].strip()
        date = datetime.datetime.strptime(date, "%m/%d/%Y").date()

        vote = VoteEvent(
            chamber="lower",
            start_date=date,
            motion_text=motion,
            result="pass" if yeas > nays else "fail",
            classification="passage",
            legislative_session=session,
            bill=bill_id,
            bill_chamber=chamber,
        )
        vote.set_count("yes", yeas)
        vote.set_count("no", nays)
        vote.add_source(vote_url)
        vote.pupa_id = vote_url

        # first table has YEAs
        for name in doc.xpath("//table[1]//font/text()"):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath("//table[2]//font/text()"):
            vote.no(name.strip())

        yield vote
Exemplo n.º 21
0
    def asvote(self):
        v = VoteEvent(
            chamber=self.chamber(),
            start_date=self.date(),
            motion_text=self.motion(),
            result="pass" if self.passed() else "fail",
            classification="passage",
            bill=self.bill,
        )
        v.dedupe_key = self.url  # URL contains sequence number
        v.set_count("yes", self.yes_count())
        v.set_count("no", self.no_count())
        v.set_count("other", self.other_count())

        for voter in self.yes_votes():
            v.yes(voter)
        for voter in self.no_votes():
            v.no(voter)
        for voter in self.other_votes():
            v.vote("other", voter)
        v.add_source(self.url)
        return v
Exemplo n.º 22
0
    def parse_vote(
        self, bill, journal_entry_number, action, act_chamber, act_date, url
    ):
        # html = self.get(url).text
        # doc = lxml.html.fromstring(html)
        yes = no = other = 0
        result = ""
        vote_counts = action.split()
        for vote_count in vote_counts:
            if re.match(r"[\D][\d]", vote_count):
                if "Y" in vote_count:
                    yes = int(vote_count[1:])
                elif "N" in vote_count:
                    no = int(vote_count[1:])
                elif "E" in vote_count or "A" in vote_count:
                    other += int(vote_count[1:])

        if "PASSED" in action:
            result = "pass"
        elif "FAILED" in action:
            result = "fail"
        else:
            result = "pass" if yes > no else "fail"

        vote = VoteEvent(
            bill=bill,
            start_date=act_date.strftime("%Y-%m-%d"),
            chamber=act_chamber,
            motion_text=action + " #" + journal_entry_number,
            result=result,
            classification="passage",
        )

        vote.set_count("yes", yes)
        vote.set_count("no", no)
        vote.set_count("other", other)
        vote.add_source(url)

        yield vote
Exemplo n.º 23
0
    def process_committee_vote(self, committee_action, bill):
        try:
            date = committee_action["ActionDate"]
            vote_info = committee_action["Vote"]

        except KeyError:
            self.logger.warning("Committee vote has no data. Skipping.")
            return
        date = self.date_format(date)

        other_count = 0
        for v in vote_info:
            vote_count = 0 if v["VoteCount"] == "" else int(v["VoteCount"])

            if v["VoteType"] == "Yes":
                yes_count = vote_count
            elif v["VoteType"] == "No":
                no_count = vote_count
            else:
                other_count += vote_count

        result = "fail"
        if yes_count > no_count:
            result = "pass"

        v = VoteEvent(
            chamber="legislature",
            start_date=date,
            motion_text="Committee Vote",
            result=result,
            classification="committee",
            bill=bill,
        )
        v.set_count("yes", yes_count)
        v.set_count("no", no_count)
        v.set_count("other", other_count)

        return v
Exemplo n.º 24
0
def viva_voce_votes(root, session, chamber):
    for el in root.xpath(u'//div[starts-with(., "All Members are deemed")]'):
        mv = MaybeViva(el)
        if not mv.is_valid:
            continue

        v = VoteEvent(
            chamber=chamber,
            start_date=None,
            motion_text="passage" if mv.passed else "other",
            result="pass" if mv.passed else "fail",
            classification="passage" if mv.passed else None,
            legislative_session=session[0:2],
            bill=mv.bill_id,
            bill_chamber=mv.chamber,
        )

        v.set_count("yes", 0)
        v.set_count("no", 0)
        v.set_count("absent", 0)
        v.set_count("not voting", 0)

        yield v
Exemplo n.º 25
0
    def scrape_votes(self, vote_url, bill, chamber):

        try:
            filename, response = self.urlretrieve(vote_url)
        except scrapelib.HTTPError:
            self.logger.warning("PDF not posted or available")
            return
        # Grabs text from pdf
        pdflines = [
            line.decode("utf-8")
            for line in convert_pdf(filename, "text").splitlines()
        ]
        os.remove(filename)

        vote_date = 0
        voters = defaultdict(list)
        for x in range(len(pdflines)):
            line = pdflines[x]
            if re.search(r"(\d+/\d+/\d+)", line):
                initial_date = line.strip()
            if ("AM" in line) or ("PM" in line):
                split_l = line.split()
                for y in split_l:
                    if ":" in y:
                        time_location = split_l.index(y)
                        motion = " ".join(split_l[0:time_location])
                        time = split_l[time_location:]
                        if len(time) > 0:
                            time = "".join(time)
                        dt = initial_date + " " + time
                        dt = datetime.strptime(dt, "%m/%d/%Y %I:%M:%S%p")
                        vote_date = central.localize(dt)
                        vote_date = vote_date.isoformat()
                        # In rare case that no motion is provided
                        if len(motion) < 1:
                            motion = "No Motion Provided"
            if "YEAS:" in line:
                yeas = int(line.split()[-1])
            if "NAYS:" in line:
                nays = int(line.split()[-1])
            if "ABSTAINED:" in line:
                abstained = int(line.split()[-1])
            if "PASSES:" in line:
                abstained = int(line.split()[-1])
            if "NOT VOTING:" in line:
                not_voting = int(line.split()[-1])

            if "YEAS :" in line:
                y = 0
                next_line = pdflines[x + y]
                while "NAYS : " not in next_line:
                    next_line = next_line.split("  ")
                    if next_line and ("YEAS" not in next_line):
                        for v in next_line:
                            if v and "YEAS" not in v:
                                voters["yes"].append(v.strip())
                    next_line = pdflines[x + y]
                    y += 1
            if line and "NAYS :" in line:
                y = 0
                next_line = 0
                next_line = pdflines[x + y]
                while ("ABSTAINED : " not in next_line) and ("PASSES :"
                                                             not in next_line):
                    next_line = next_line.split("  ")
                    if next_line and "NAYS" not in next_line:
                        for v in next_line:
                            if v and "NAYS" not in v:
                                voters["no"].append(v.strip())
                    next_line = pdflines[x + y]
                    y += 1

            if line and ("ABSTAINED :" in line or "PASSES :" in line):
                y = 2
                next_line = 0
                next_line = pdflines[x + y]
                while "NOT VOTING :" not in next_line:
                    next_line = next_line.split("  ")
                    if next_line and ("ABSTAINED" not in next_line
                                      or "PASSES" not in next_line):
                        for v in next_line:
                            if v:
                                voters["abstain"].append(v.strip())
                    next_line = pdflines[x + y]
                    y += 1

            if line and "NOT VOTING : " in line:
                lines_to_go_through = math.ceil(not_voting / len(line.split()))
                next_line = pdflines[x]
                for y in range(lines_to_go_through):
                    if len(pdflines) > (x + y + 2):
                        next_line = pdflines[x + y + 2].split("  ")
                        for v in next_line:
                            if v:
                                voters["not voting"].append(v.strip())
                if yeas > (nays + abstained + not_voting):
                    passed = True
                else:
                    passed = False

                ve = VoteEvent(
                    chamber=chamber,
                    start_date=vote_date,
                    motion_text=motion,
                    result="pass" if passed else "fail",
                    bill=bill,
                    classification="passage",
                )
                ve.add_source(vote_url)
                for how_voted, how_voted_voters in voters.items():
                    for voter in how_voted_voters:
                        if len(voter) > 0:
                            ve.vote(how_voted, voter)
                # Resets voters dictionary before going onto next page in pdf
                voters = defaultdict(list)
                yield ve
Exemplo n.º 26
0
    def parse_vote_pdf(self, vote_url, bill):

        filename, response = self.urlretrieve(vote_url)

        text = convert_pdf(filename, type="text").decode()
        lines = text.splitlines()

        if "Senate" in vote_url:
            chamber = "upper"
        else:
            chamber = "lower"

        date_string = lines[0].split("Calendar Date:")[1].strip()
        date = datetime.datetime.strptime(date_string, "%b %d, %Y %I:%M (%p)")

        page_index = None
        for index, line in enumerate(lines):
            if "Yeas" in line and "Nays" in line:
                page_index = index
                break

        vote_counts = 5 * [0]
        vote_types = ["yes", "no", "not voting", "excused", "absent"]

        if page_index:

            counts = re.split(r"\s{2,}", lines[page_index].strip())

            for index, count in enumerate(counts):
                number, string = count.split(" ", 1)
                number = int(number)
                vote_counts[index] = number
        else:
            raise ValueError("Vote Counts Not found at %s" % vote_url)

        passed = vote_counts[0] > vote_counts[1]

        # Consent calendar votes address multiple bills in one VoteEvent
        # eg, http://mgaleg.maryland.gov/2018RS/votes/Senate/0478.pdf
        is_consent_calendar = any(
            ["Consent Calendar" in line for line in lines[:page_index]]
        )
        consent_calendar_bills = None
        motion = ""
        if is_consent_calendar:
            motion = re.split(r"\s{2,}", lines[page_index - 4].strip())[0]
            consent_calendar_bills = re.split(r"\s{2,}", lines[page_index - 1].strip())
            assert (
                consent_calendar_bills
            ), "Could not find bills for consent calendar vote"

        motion_keywords = [
            "favorable",
            "reading",
            "amendment",
            "motion",
            "introduced",
            "bill pass",
            "committee",
        ]
        motion_lines = [
            3,
            2,
            4,
            5,
        ]  # Relative LineNumbers to be checked for existence of motion

        for i in motion_lines:
            if any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                break
            motion = re.split(r"\s{2,}", lines[page_index - i].strip())[0]
        else:
            if not any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                # This condition covers for the bad formating in SB 1260
                motion = lines[page_index - 3]
            if not any(
                motion_keyword in motion.lower() for motion_keyword in motion_keywords
            ):
                # Check this one for SB 747
                motion = "No motion given"
                self.warning("No motion given")

        vote = VoteEvent(
            bill=bill,
            chamber=chamber,
            start_date=date.strftime("%Y-%m-%d"),
            motion_text=motion,
            classification="passage",
            result="pass" if passed else "fail",
        )

        # Include bill ID to avoid duplication for consent calendars
        vote.pupa_id = "{}#{}".format(vote_url, bill.identifier)

        for index, vote_type in enumerate(vote_types):
            vote.set_count(vote_type, vote_counts[index])
        page_index = page_index + 2

        # Keywords for identifying where names are located in the pdf
        show_stoppers = [
            "Voting Nay",
            "Not Voting",
            "COPY",
            "Excused",
            "indicates vote change",
            "Indicates Vote Change",
        ]
        vote_index = 0

        # For matching number of names extracted with vote counts(extracted independently)
        vote_name_counts = 5 * [0]

        while page_index < len(lines):

            current_line = lines[page_index].strip()

            if not current_line or "Voting Yea" in current_line:
                page_index += 1
                continue

            if any(show_stopper in current_line for show_stopper in show_stoppers):
                page_index += 1
                vote_index = vote_index + 1
                continue

            names = re.split(r"\s{2,}", current_line)

            vote_name_counts[vote_index] += len(names)

            for name in names:
                vote.vote(vote_types[vote_index], name)
            page_index += 1

        if vote_counts != vote_name_counts:
            raise ValueError("Votes Count and Number of Names don't match")

        return vote
Exemplo n.º 27
0
    def scrape_vote(self, bill, date, url):
        page = self.get(url).json()

        location = page["actionLog"]["FullName"]
        if location:
            if "House" in location:
                chamber = "lower"
            elif "Senate" in location:
                chamber = "upper"
            elif "Joint" in location:
                chamber = "legislature"
            else:
                self.warning("Bad Vote chamber: '%s', skipping" % location)
                return
        else:
            self.warning("Bad Vote chamber: '%s', skipping" % location)
            return

        motion = page["actionLog"]["StatusText"]
        if motion:
            # If we can't detect a motion, skip this vote
            yes_count = page["Yeas"]
            no_count = page["Nays"]
            excused_count = page["Excused"]
            absent_count = page["Absent"]

            passed = yes_count > no_count

            if motion.startswith("Do Pass"):
                vtype = "passage"
            elif motion == "Concurred in amendments":
                vtype = "amendment"
            # commenting out until we add these back to OS-core
            # elif motion == "Veto override":
            #     vtype = "veto-override"
            else:
                vtype = []

            vote = VoteEvent(
                chamber=chamber,
                start_date=date,
                motion_text=motion,
                result="pass" if passed else "fail",
                classification=vtype,
                bill=bill,
            )
            # differentiate nearly identical votes
            vote.dedupe_key = url

            vote.add_source(url)
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("excused", excused_count)
            vote.set_count("absent", absent_count)

            for person in page["RollCalls"]:
                option = person["Vote1"]
                if option in ("Aye", "Yea"):
                    vote.yes(person["UniqueName"])
                elif option == "Nay":
                    vote.no(person["UniqueName"])
                elif option == "Excused":
                    vote.vote("excused", person["UniqueName"])
                elif option == "Absent":
                    vote.vote("absent", person["UniqueName"])

            yield vote
Exemplo n.º 28
0
    def scrape_vote(self, url, session):
        fname, _ = self.urlretrieve(url)
        text = convert_pdf(fname, type="text").decode()
        lines = text.splitlines()

        chamber = "upper" if "senate" in url else "lower"
        if "Maryland" not in text:
            self.warning(f"empty vote from {url}")
            return
        date = re.findall(r"Legislative Date: (\w+ \d+, \d{4})", text)[0]

        section = "preamble"
        motion = None
        bill_id = None
        how = None
        voters = defaultdict(list)

        for line in lines:
            if section == "preamble":
                if "vetoed" in line.lower():
                    self.warning(
                        f"skipping vote that appears to be on prior session: {line}, {bill_id}"
                    )
                    return
                possible_bill_id = re.findall(r"([HS][BJR] \d+)", line)
                if possible_bill_id:
                    bill_id = possible_bill_id[0]

                # preamble has metadata, then motion, then counts.  our process then is to
                # store the last line as the motion, but if the last line looks like a
                # continuation, append it to the prior line

                line = line.strip()
                counts = re.findall(
                    r"(\d+) Yeas\s+(\d+) Nays\s+(\d+) Not Voting\s+(\d+) Excused\s+(\d+) Absent",
                    line,
                )
                if counts:
                    yes_count, no_count, nv_count, excused_count, absent_count = counts[
                        0]
                    yes_count = int(yes_count)
                    no_count = int(no_count)
                    nv_count = int(nv_count)
                    excused_count = int(excused_count)
                    absent_count = int(absent_count)
                    section = "votes"
                elif line and line != "(Const)":
                    # questions seem to be split across two lines
                    if line.endswith("?"):
                        motion = motion + " " + line
                    else:
                        motion = line
            elif section == "votes":
                if line.startswith("Voting Yea"):
                    how = "yes"
                elif line.startswith("Voting Nay"):
                    how = "no"
                elif line.startswith("Not Voting"):
                    how = "not voting"
                elif line.startswith("Excused from Voting"):
                    how = "excused"
                elif line.startswith("Excused (Absent)"):
                    how = "absent"
                elif how:
                    names = re.split(r"\s{2,}", line)
                    voters[how].extend(names)

        if not bill_id and not motion:
            return
        elif bill_id and not motion:
            self.warning(
                f"got {bill_id} but no motion, not registering as a vote")
        elif motion and not bill_id:
            self.warning(
                f"got {motion} but no bill_id, not registering as a vote")
            return

        # bleh - result not indicated anywhere
        result = "pass" if yes_count > no_count else "fail"
        bill_chamber = "upper" if bill_id.startswith("S") else "lower"
        date = datetime.datetime.strptime(date,
                                          "%b %d, %Y").strftime("%Y-%m-%d")
        vote = VoteEvent(
            chamber=chamber,
            start_date=date,
            result=result,
            classification="passage",
            motion_text=motion,
            legislative_session=session,
            bill=bill_id,
            bill_chamber=bill_chamber,
        )
        # URL includes sequence ID, will be unique
        vote.dedupe_key = url
        vote.add_source(url)
        vote.set_count("yes", yes_count)
        vote.set_count("no", no_count)
        vote.set_count("not voting", nv_count)
        vote.set_count("excused", excused_count)
        vote.set_count("absent", absent_count)
        for how, names in voters.items():
            for name in names:
                name = name.strip().replace("*", "")
                if name and "COPY" not in name and "Indicates Vote Change" not in name:
                    vote.vote(how, name)
        check_counts(vote, raise_error=True)
        return vote
Exemplo n.º 29
0
    def parse_bill_actions_table(self, bill, action_table, bill_id, session,
                                 url, bill_chamber):

        # vote types that have been reconsidered since last vote of that type
        reconsiderations = set()

        for action in action_table.xpath("*")[1:]:
            date = action[0].text_content()
            date = dt.datetime.strptime(date, "%m/%d/%Y").strftime("%Y-%m-%d")
            actor_code = action[1].text_content().upper()
            string = action[2].text_content()
            actor = self._vote_type_map[actor_code]
            act_type, committees = categorize_action(string)
            # XXX: Translate short-code to full committee name for the
            #      matcher.

            real_committees = []

            if committees:
                for committee in committees:
                    try:
                        committee = self.short_ids[committee]["name"]
                        real_committees.append(committee)
                    except KeyError:
                        pass
            act = bill.add_action(string,
                                  date,
                                  chamber=actor,
                                  classification=act_type)

            for committee in real_committees:
                act.add_related_entity(name=committee,
                                       entity_type="organization")
            vote = self.parse_vote(string)

            if vote:
                v, motion = vote
                motion_text = (("Reconsider: " + motion)
                               if actor in reconsiderations else motion)
                vote = VoteEvent(
                    start_date=date,
                    chamber=actor,
                    bill=bill_id,
                    bill_chamber=bill_chamber,
                    legislative_session=session,
                    motion_text=motion_text,
                    result="pass" if "passed" in string.lower() else "fail",
                    classification="passage",
                )
                reconsiderations.discard(actor)
                vote.add_source(url)
                vote.set_count("yes", int(v["n_yes"] or 0))
                vote.set_count("no", int(v["n_no"] or 0))
                vote.set_count("not voting", int(v["n_excused"] or 0))
                for voter in split_specific_votes(v["yes"]):
                    voter = self.clean_voter_name(voter)
                    vote.yes(voter)
                for voter in split_specific_votes(v["yes_resv"]):
                    voter = self.clean_voter_name(voter)
                    vote.yes(voter)
                for voter in split_specific_votes(v["no"]):
                    voter = self.clean_voter_name(voter)
                    vote.no(voter)
                for voter in split_specific_votes(v["excused"]):
                    voter = self.clean_voter_name(voter)
                    vote.vote("not voting", voter)

                yield vote

            elif re.search("reconsider", string, re.IGNORECASE):
                reconsiderations.add(actor)
Exemplo n.º 30
0
    def handle_page(self):
        # Checks to see if any vote totals are provided
        if (len(
                self.doc.xpath(
                    '//span[contains(@id, "ctl00_MainContent_lblTotal")]/text()'
                )) > 0):
            (date,
             ) = self.doc.xpath('//span[contains(@id, "lblDate")]/text()')
            date = format_datetime(
                datetime.datetime.strptime(date, "%m/%d/%Y %I:%M:%S %p"),
                "US/Eastern")
            # ctl00_MainContent_lblTotal //span[contains(@id, "ctl00_MainContent_lblTotal")]
            yes_count = int(
                self.doc.xpath('//span[contains(@id, "lblYeas")]/text()')[0])
            no_count = int(
                self.doc.xpath('//span[contains(@id, "lblNays")]/text()')[0])
            other_count = int(
                self.doc.xpath('//span[contains(@id, "lblMissed")]/text()')[0])
            result = "pass" if yes_count > no_count else "fail"

            (committee,
             ) = self.doc.xpath('//span[contains(@id, "lblCommittee")]/text()')
            (action,
             ) = self.doc.xpath('//span[contains(@id, "lblAction")]/text()')
            motion = "{} ({})".format(action, committee)

            vote = VoteEvent(
                start_date=date,
                bill=self.kwargs["bill"],
                chamber="lower",
                motion_text=motion,
                result=result,
                classification="committee",
            )
            vote.add_source(self.url)
            vote.set_count("yes", yes_count)
            vote.set_count("no", no_count)
            vote.set_count("not voting", other_count)

            for member_vote in self.doc.xpath(
                    '//ul[contains(@class, "vote-list")]/li'):
                if not member_vote.text_content().strip():
                    continue

                (member, ) = member_vote.xpath("span[2]//text()")
                (member_vote, ) = member_vote.xpath("span[1]//text()")

                member = member.strip()
                if member_vote == "Y":
                    vote.yes(member)
                elif member_vote == "N":
                    vote.no(member)
                elif member_vote == "-":
                    vote.vote("not voting", member)
                # Parenthetical votes appear to not be counted in the
                # totals for Yea, Nay, _or_ Missed
                elif re.search(r"\([YN]\)", member_vote):
                    continue
                else:
                    raise ValueError(
                        "Unknown vote type found: {}".format(member_vote))

            yield vote