예제 #1
0
    def scrape_old_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.h3.contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        def get_count(cell):
            if len(cell.contents) == 0:
                return 0
            else:
                return int(cell.contents[0])

        results_tbl = vote_page.findAll('table')[1]
        yes_count = get_count(results_tbl.findAll('td')[1])
        no_count = get_count(results_tbl.findAll('td')[3])
        excused_count = get_count(results_tbl.findAll('td')[5])
        absent_count = get_count(results_tbl.findAll('td')[7])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.table
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
예제 #2
0
    def scrape_new_vote(self, url):
        vote_page = self.soup_parser(self.urlopen(url))

        header = vote_page.find(id="ctl00_contentMain_hdVote").contents[0]

        chamber_name = header.split(', ')[1]
        if chamber_name.startswith('House'):
            chamber = 'lower'
        else:
            chamber = 'upper'

        location = ' '.join(chamber_name.split(' ')[1:])
        if location.startswith('of Representatives'):
            location = ''

        motion = ', '.join(header.split(', ')[2:])

        yes_count = int(vote_page.find(
            id="ctl00_contentMain_tdAyes").contents[0])
        no_count = int(vote_page.find(
            id="ctl00_contentMain_tdNays").contents[0])
        excused_count = int(vote_page.find(
            id="ctl00_contentMain_tdExcused").contents[0])
        absent_count = int(vote_page.find(
            id="ctl00_contentMain_tdAbsent").contents[0])
        other_count = excused_count + absent_count

        passed = yes_count > no_count

        vote = Vote(chamber, None, motion, passed,
                    yes_count, no_count,
                    other_count, excused_count=excused_count,
                    absent_count=absent_count,
                    location=location)
        vote.add_source(url)

        vote_tbl = vote_page.find(id="ctl00_contentMain_tblVotes")
        for td in vote_tbl.findAll('td'):
            if td.contents[0] == 'Yea':
                vote.yes(td.findPrevious().contents[0])
            elif td.contents[0] == 'Nay':
                vote.no(td.findPrevious().contents[0])
            elif td.contents[0] in ['Excused', 'Absent']:
                vote.other(td.findPrevious().contents[0])

        return vote
예제 #3
0
 def parse_vote(self, bill, actor, date, text, line):
     url = "http://leg1.state.va.us%s" % line['href']
     abbr = {'S': 'upper', 'H': 'lower'}
     with self.soup_context(url) as vote_data:
         house = abbr[re.findall('\d+/\d+\/\d+\s+([\bHouse\b|\bSenate\b])', self.unescape(unicode(vote_data)))[0]]
         vote = Vote(house, date, text, None, 0, 0, 0)
         for cast in vote_data.findAll('p'):
             if cast.string is None:
                 continue
             cleaned = cast.string.replace('\r\n', ' ')
             split_start = cleaned.find('--')
             voted = cleaned[0:split_start].strip()
             split_end = cleaned.find('--', split_start + 2)
             if split_end == -1:
                 continue
             names = []
             maybe_names = cleaned[split_start+2:split_end].split(", ")
             t_name = ''
             #not sure how to skip iterations, so.
             for i in range(len(maybe_names)):
                if re.match('\w\.\w\.', maybe_names[i]):
                    names.append(t_name + ', ' + maybe_names[i])
                    t_name = ''
                else:
                    if t_name != '':
                        names.append(t_name)
                    t_name = maybe_names[i]
             for voter in names:
                 sanitized = voter.replace('.', '').lower()
                 if voted=='YEAS':
                     vote.yes(voter)
                 elif voted=='NAYS':
                     vote.no(voter)
                 else:
                     vote.other(voter.strip())
         vote['other_count'] = len(vote['other_votes'])			
         vote['yes_count'] = len(vote['yes_votes'])
         vote['no_count'] = len(vote['no_votes'])
         vote['passed'] = (vote['yes_count'] > vote['no_count'])
         bill.add_vote(vote)
예제 #4
0
    def scrape_bill(self, chamber, current_bill, session):
        other_chamber = "upper" if chamber == "lower" else "lower"
        with self.soup_context(
            "http://alisondb.legislature.state.al.us/acas/SESSBillsStatusResultsMac.asp?BillNumber=%s&GetStatus=Get+Status&session=%s"
            % (current_bill, session[0])
        ) as bill:
            if "Your ACAS Session has expired." in str(bill):
                raise Exception("Expired cookie - you'll have to run with -n to skip caching")
            try:
                bill_id = int(re.findall(r"BTN([0-9]+)", str(bill))[0])
            except:
                raise Exception("No bill found. Hopefully that means it's the end of the session")
            title = bill.find("td", {"colspan": "7"}).string
            self.log("Starting parse of %s" % current_bill)
            # create our bill!
            bill = Bill(session[1], chamber, current_bill, title.strip())

            # add sponsors and co-sponsors
            with self.soup_context(
                "http://alisondb.legislature.state.al.us/acas/ACTIONSponsorsResultsMac.asp?OID=%d" % bill_id
            ) as sponsors:
                # This pains me.
                (primary, secondary) = sponsors.findAll("table", text="Co-Sponsors")[0].parent.parent.parent.findAll(
                    "table"
                )
                for p in primary.findAll("td"):
                    bill.add_sponsor("primary", p.string)
                for s in secondary.findAll("td"):
                    bill.add_sponsor("cosponsor", s.string)
            with self.soup_context(
                "http://alisondb.legislature.state.al.us/acas/ACTIONHistoryResultsMac.asp?OID=%d" % bill_id
            ) as history:
                actions = history.findAll("table", text="Committee")[0].parent.parent.parent.findAll("tr")
                # Date Amend/Subst Matter Committee Nay Yea Abs Vote
                for event in actions:
                    e = event.findAll("td")
                    if len(e) == 0:
                        continue
                    date = e[0].string
                    amend = e[1].find("input")
                    matter = e[2].string
                    y_votes = e[5].string
                    n_votes = e[4].string
                    a_votes = e[6].string

                    if not matter:
                        continue

                    roll = e[7].find("input")
                    # (date, amend, matter, committee, nays, yeas, abs, vote_thing) = map(lambda x: x.string, e)
                    if date != None:
                        act_date = dt.datetime.strptime(date, "%m/%d/%Y")
                    if amend != None:
                        splitter = re.findall(
                            r"documentSelected\(\'(\w*)\',\'([\w\d-]*)\',\'([\w\.\-]*)\',\'([\w\d/]*)\',\'([\w\d]*)\',\'([\w\s]*)\'",
                            str(amend),
                        )[0]
                        amend = "http://alisondb.legislature.state.al.us/acas/%s/%s" % (splitter[3], splitter[2])
                        bill.add_document(matter, amend)

                    if roll != None:
                        splitter = re.findall(
                            r"voteSelected\(\'(\d*)\',\'(\d*)\',\'(\d*)\',\'(.*)\',\'(\d*)\'", str(roll)
                        )[0]
                        roll = (
                            "http://alisondb.legislature.state.al.us/acas/GetRollCallVoteResults.asp?MOID=%s&VOTE=%s&BODY=%s&SESS=%s"
                            % (splitter[0], splitter[1], splitter[2], splitter[4])
                        )
                        with self.soup_context(roll) as votes:
                            vote_rows = votes.findAll("table", text="Member")[0].parent.parent.parent.findAll("tr")

                            yea_votes = int(
                                votes.findAll("tr", text="Total Yea:")[0].parent.parent.findAll("td")[2].string
                            )
                            nay_votes = int(
                                votes.findAll("tr", text="Total Nay:")[0].parent.parent.findAll("td")[2].string
                            )
                            abs_votes = int(
                                votes.findAll("tr", text="Total Abs:")[0].parent.parent.findAll("td")[2].string
                            )
                            p_votes = len(votes.findAll("tr", text="P"))

                            # chamber, date, motion, passed, yes_count, no_count, other_count
                            vote = Vote(
                                chamber,
                                act_date,
                                matter,
                                (yea_votes > nay_votes),
                                yea_votes,
                                nay_votes,
                                abs_votes + p_votes,
                            )

                            vote.add_source(roll)
                            for row in vote_rows:
                                skip = str(row)
                                if "Total Yea" in skip or "Total Nay" in skip or "Total Abs" in skip:
                                    continue
                                html_layouts_are_awesome = row.findAll("td")
                                if len(html_layouts_are_awesome) == 0:
                                    continue

                                (name, t) = html_layouts_are_awesome[0].string, html_layouts_are_awesome[2].string
                                self.dumb_vote(vote, name, t)

                                if len(html_layouts_are_awesome) > 3:
                                    (name, t) = html_layouts_are_awesome[4].string, html_layouts_are_awesome[6].string
                                    self.dumb_vote(vote, name, t)
                            bill.add_vote(vote)

                    if y_votes != None:
                        yea_votes = self.dumber_vote(y_votes)
                        nay_votes = self.dumber_vote(n_votes)
                        abs_votes = self.dumber_vote(a_votes)
                        vote = Vote(chamber, act_date, matter, (yea_votes > nay_votes), yea_votes, nay_votes, abs_votes)
                        bill.add_vote(vote)

                    bill.add_action(chamber, matter, act_date)
            self.save_bill(bill)