Exemplo n.º 1
0
    def scrape_vote(self, bill, date, motion, url):
        page = lxml.html.fromstring(self.urlopen(url))

        if url.endswith('Senate'):
            actor = 'upper'
        else:
            actor = 'lower'

        count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
        yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
        no_count = int(page.xpath(count_path % "Nays").split()[-1])
        other_count = int(page.xpath(count_path % "Non Voting").split()[-1])
        other_count += int(page.xpath(count_path % "Present").split()[-1])

        passed = yes_count > no_count + other_count
        vote = Vote(actor, date, motion, passed, yes_count,
                    no_count, other_count)
        vote.add_source(url)

        vote_path = "//h3[. = '%s']/following-sibling::table[1]/tr/td/a"
        for yes in page.xpath(vote_path % "Yeas"):
            vote.yes(yes.text)
        for no in page.xpath(vote_path % "Nays"):
            vote.no(no.text)
        for other in page.xpath(vote_path % "Non Voting"):
            vote.other(other.text)
        for other in page.xpath(vote_path % "Present"):
            vote.other(other.text)

        bill.add_vote(vote)
Exemplo n.º 2
0
    def scrape_vote(self, bill, vote_chamber, bill_id, vote_id, vote_date,
                    action_text):
        url = ('http://alisondb.legislature.state.al.us/Alison/'
               'GetRollCallVoteResults.aspx?'
               'VOTE={0}&BODY={1}&INST={2}&SESS={3}'.
               format(vote_id, vote_chamber, bill_id, self.session_id))
        doc = lxml.html.fromstring(self.get(url=url).text)

        voters = {'Y': [], 'N': [], 'P': [], 'A': []}

        voters_and_votes = doc.xpath('//table/tr/td/font/text()')
        capture_vote = False
        name = ''
        for item in voters_and_votes:
            if capture_vote:
                capture_vote = False
                if name:
                    voters[item].append(name)
            else:
                capture_vote = True
                name = item
                if (name.endswith(", Vacant") or
                        name.startswith("Total ") or
                        not name.strip()):
                    name = ''

        # Check name counts against totals listed on the site
        total_yea = doc.xpath('//*[starts-with(text(), "Total Yea")]/text()')
        if total_yea:
            total_yea = int(total_yea[0].split(":")[-1])
            assert total_yea == len(voters['Y']), "Yea count incorrect"
        else:
            total_yea = len(voters['Y'])

        total_nay = doc.xpath('//*[starts-with(text(), "Total Nay")]/text()')
        if total_nay:
            total_nay = int(total_nay[0].split(":")[-1])
            assert total_nay == len(voters['N']), "Nay count incorrect"
        else:
            total_nay = len(voters['N'])

        total_absent = doc.xpath(
            '//*[starts-with(text(), "Total Absent")]/text()')
        if total_absent:
            total_absent = int(total_absent[0].split(":")[-1])
            assert total_absent == len(voters['A']), "Absent count incorrect"
        total_other = len(voters['P']) + len(voters['A'])

        vote = Vote(
            self.CHAMBERS[vote_chamber[0]], vote_date, action_text,
            total_yea > total_nay, total_yea, total_nay, total_other)
        vote.add_source(url)
        for member in voters['Y']:
            vote.yes(member)
        for member in voters['N']:
            vote.no(member)
        for member in (voters['A'] + voters['P']):
            vote.other(member)

        bill.add_vote(vote)
Exemplo n.º 3
0
    def scrape_bill(self, session, bills):

        billdata, details = bills[0]

        (senate_url, assembly_url, bill_chamber, bill_type, bill_id, title,
         (letter, number, is_amd)) = details

        data = billdata['data']['bill']

        assembly = AssemblyBillPage(self, session, bill_chamber, details)
        assembly.build()
        bill = assembly.bill
        bill.add_source(billdata['url'])

        # Add companion.
        if data['sameAs']:
            bill.add_companion(data['sameAs'])

        if data['summary']:
            bill['summary'] = data['summary']

        if data['votes']:
            for vote_data in data['votes']:
                vote = Vote(chamber='upper',
                            date=self.date_from_timestamp(
                                vote_data['voteDate']),
                            motion=vote_data['description']
                            or '[No motion available.]',
                            passed=False,
                            yes_votes=[],
                            no_votes=[],
                            other_votes=[],
                            yes_count=0,
                            no_count=0,
                            other_count=0)

                for name in vote_data['ayes']:
                    vote.yes(name)
                    vote['yes_count'] += 1
                for names in map(vote_data.get,
                                 ['absent', 'excused', 'abstains']):
                    for name in names:
                        vote.other(name)
                        vote['other_count'] += 1
                for name in vote_data['nays']:
                    vote.no(name)
                    vote['no_count'] += 1

                vote['passed'] = vote['yes_count'] > vote['no_count']

                bill.add_vote(vote)

        # if data['previousVersions']:
        #   These are instances of the same bill from prior sessions.
        #     import pdb; pdb.set_trace()

        if not data['title']:
            bill['title'] = bill['summary']

        self.save_bill(bill)
Exemplo n.º 4
0
    def scrape_vote(self, bill, date, motion, url):
        page = self.urlopen(url)

        if "not yet official" in page:
            # Sometimes they link to vote pages before they go live
            return

        page = lxml.html.fromstring(page)

        if url.endswith("Senate"):
            actor = "upper"
        else:
            actor = "lower"

        count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
        yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
        no_count = int(page.xpath(count_path % "Nays").split()[-1])
        other_count = int(page.xpath(count_path % "Non Voting").split()[-1])
        other_count += int(page.xpath(count_path % "Present").split()[-1])

        passed = yes_count > no_count + other_count
        vote = Vote(actor, date, motion, passed, yes_count, no_count, other_count)
        vote.add_source(url)

        vote_path = "//h3[. = '%s']/following-sibling::table[1]/tr/td/a"
        for yes in page.xpath(vote_path % "Yeas"):
            vote.yes(yes.text)
        for no in page.xpath(vote_path % "Nays"):
            vote.no(no.text)
        for other in page.xpath(vote_path % "Non Voting"):
            vote.other(other.text)
        for other in page.xpath(vote_path % "Present"):
            vote.other(other.text)

        bill.add_vote(vote)
Exemplo n.º 5
0
    def parse_vote(self, actor, date, row):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath('.//span')
        motion = row.text.replace(u'\u00a0'," ").replace("-","").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = spans[0].text_content().rsplit('-',3)
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(('Absent', 'Excused')):
                other_votes += self.get_names(span.tail)
        for key, val in {'adopted': True, 'passed': True, 'failed':False}.items():
            if key in passed.lower():
                passed = val
                break
        vote = Vote(actor, date, motion, passed, int(yes_count), int(no_count),
                    int(other_count))
        for name in yes_votes:
            if name and name != 'None':
                vote.yes(name)
        for name in no_votes:
            if name and name != 'None':
                vote.no(name)
        for name in other_votes:
            if name and name != 'None':
                vote.other(name)
        return vote
Exemplo n.º 6
0
    def scrape_vote(self, bill, date, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            header = page.xpath("string(//h4[contains(@id, 'hdVote')])")

            location = header.split(', ')[1]

            if location.startswith('House'):
                chamber = 'lower'
            elif location.startswith('Senate'):
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            committee = ' '.join(location.split(' ')[1:]).strip()
            if not committee or committee.startswith('of Representatives'):
                committee = None

            motion = ', '.join(header.split(', ')[2:]).strip()

            yes_count = int(
                page.xpath("string(//td[contains(@id, 'tdAyes')])"))
            no_count = int(
                page.xpath("string(//td[contains(@id, 'tdNays')])"))
            excused_count = int(
                page.xpath("string(//td[contains(@id, 'tdExcused')])"))
            absent_count = int(
                page.xpath("string(//td[contains(@id, 'tdAbsent')])"))
            other_count = excused_count + absent_count

            passed = yes_count > no_count

            if motion.startswith('Do Pass'):
                type = 'passage'
            elif motion == 'Concurred in amendments':
                type = 'amendment'
            elif motion == 'Veto override':
                type = 'veto_override'
            else:
                type = 'other'

            vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                        other_count)
            vote['type'] = type

            if committee:
                vote['committee'] = committee

            vote.add_source(url)

            for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"):
                if td.text == 'Yea':
                    vote.yes(td.getprevious().text.strip())
                elif td.text == 'Nay':
                    vote.no(td.getprevious().text.strip())
                elif td.text in ('Excused', 'Absent'):
                    vote.other(td.getprevious().text.strip())

            bill.add_vote(vote)
Exemplo n.º 7
0
    def scrape_vote(self, bill, vote_type_id, vote_type):
        base_url = "http://dcclims1.dccouncil.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s"
        url = base_url % (vote_type_id, bill["bill_id"])

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            vote_date = convert_date(doc.get_element_by_id("VoteDate").text)

            # check if voice vote / approved boxes have an 'x'
            voice = doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == "x"
            passed = doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0] == "x"

            yes_count = extract_int(doc.xpath('//span[@id="VoteCount1"]/b/text()')[0])
            no_count = extract_int(doc.xpath('//span[@id="VoteCount2"]/b/text()')[0])
            # every now and then this actually drops below 0 (error in count)
            other_count = max(13 - (yes_count + no_count), 0)

            vote = Vote("upper", vote_date, vote_type, passed, yes_count, no_count, other_count, voice_vote=voice)

            vote.add_source(url)

            # members are only text on page in a <u> tag
            for member_u in doc.xpath("//u"):
                member = member_u.text
                vote_text = member_u.xpath("../../i/text()")[0]
                if "Yes" in vote_text:
                    vote.yes(member)
                elif "No" in vote_text:
                    vote.no(member)
                else:
                    vote.other(member)
        bill.add_vote(vote)
Exemplo n.º 8
0
 def apply_votes(self, bill):
     """Given a bill (and assuming it has a status_url in its dict), parse all of the votes
     """
     bill_votes = votes.all_votes_for_url(self, bill['status_url'])
     for (chamber,vote_desc,pdf_url,these_votes) in bill_votes:
         try:
             date = vote_desc.split("-")[-1]
         except IndexError:
             self.warning("[%s] Couldn't get date out of [%s]" % (bill['bill_id'],vote_desc))
             continue
         yes_votes = []
         no_votes = []
         other_votes = []
         for voter,vote in these_votes.iteritems():
             if vote == 'Y': 
                 yes_votes.append(voter)
             elif vote == 'N': 
                 no_votes.append(voter)
             else:
                 other_votes.append(voter)
         passed = len(yes_votes) > len(no_votes) # not necessarily correct, but not sure where else to get it. maybe from pdf
         vote = Vote(standardize_chamber(chamber),date,vote_desc,passed, len(yes_votes), len(no_votes), len(other_votes),pdf_url=pdf_url)
         for voter in yes_votes:
             vote.yes(voter)
         for voter in no_votes:
             vote.no(voter)
         for voter in other_votes:
             vote.other(voter)
         bill.add_vote(vote)
Exemplo n.º 9
0
    def parse_vote(self, actor, date, row):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath('.//span')
        motion = row.text
        passed, yes_count, no_count, other_count = spans[0].text_content().split('-')
        yes_votes = [ name for name in
                      spans[1].tail.replace(u'\xa0--\xa0', '').split(',')
                      if name ]

        no_votes = [ name for name in
                     spans[2].tail.replace(u'\xa0--\xa0', '').split(',')
                     if name ]
        other_votes = []
        if spans[3].text.startswith('Absent'):
            other_votes = [ name for name in
                            spans[3].tail.replace(u'\xa0--\xa0', '').split(',')
                            if name ]
        for key, val in {'adopted': True, 'passed': True, 'failed':False}.items():
            if key in passed.lower():
                passed = val
                break
        vote = Vote(actor, date, motion, passed, int(yes_count), int(no_count),
                    int(other_count))
        for name in yes_votes:
            if name and name != 'None':
                vote.yes(name)
        for name in no_votes:
            if name and name != 'None':
                vote.no(name)
        for name in other_votes:
            if name and name != 'None':
                vote.other(name)
        return vote
Exemplo n.º 10
0
    def scrape(self, chamber, session):
        self.validate_session(session)

        if chamber == 'upper':
            other_chamber = 'lower'
            bill_id = 'SB 1'
        else:
            other_chamber = 'upper'
            bill_id = 'HB 1'

        b1 = Bill(session, chamber, bill_id, 'A super bill')
        b1.add_source('http://example.com/')
        b1.add_version('As Introduced', 'http://example.com/SB1.html')
        b1.add_document('Google', 'http://google.com')
        b1.add_sponsor('primary', 'Bob Smith')
        b1.add_sponsor('secondary', 'Johnson, Sally')

        d1 = datetime.datetime.strptime('1/29/2010', '%m/%d/%Y')
        v1 = Vote('upper', d1, 'Final passage', True, 2, 0, 0)
        v1.yes('Smith')
        v1.yes('Johnson')

        d2 = datetime.datetime.strptime('1/30/2010', '%m/%d/%Y')
        v2 = Vote('lower', d2, 'Final passage', False, 0, 1, 1)
        v2.no('Bob Smith')
        v2.other('S. Johnson')

        b1.add_vote(v1)
        b1.add_vote(v2)

        b1.add_action(chamber, 'introduced', d1)
        b1.add_action(chamber, 'read first time', d2)
        b1.add_action(other_chamber, 'introduced', d2)

        self.save_bill(b1)
Exemplo n.º 11
0
    def scrape_vote(self, bill, date, motion, url):
        page = lxml.html.fromstring(self.urlopen(url))

        if url.endswith('Senate'):
            actor = 'upper'
        else:
            actor = 'lower'

        count_path = "string(//td[@align = 'center' and contains(., '%s: ')])"
        yes_count = int(page.xpath(count_path % "Yeas").split()[-1])
        no_count = int(page.xpath(count_path % "Nays").split()[-1])
        other_count = int(page.xpath(count_path % "Non Voting").split()[-1])
        other_count += int(page.xpath(count_path % "Present").split()[-1])

        passed = yes_count > no_count + other_count
        vote = Vote(actor, date, motion, passed, yes_count, no_count,
                    other_count)
        vote.add_source(url)

        vote_path = "//h3[. = '%s']/following-sibling::table[1]/tr/td/a"
        for yes in page.xpath(vote_path % "Yeas"):
            vote.yes(yes.text)
        for no in page.xpath(vote_path % "Nays"):
            vote.no(no.text)
        for other in page.xpath(vote_path % "Non Voting"):
            vote.other(other.text)
        for other in page.xpath(vote_path % "Present"):
            vote.other(other.text)

        bill.add_vote(vote)
Exemplo n.º 12
0
    def parse_vote(self, actor, date, row):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath('.//span')
        motion = row.text.replace(u'\u00a0', " ").replace("-", "").strip()
        motion = motion if motion else "passage"
        passed, yes_count, no_count, other_count = spans[0].text_content().rsplit('-', 3)
        yes_votes = self.get_names(spans[1].tail)
        no_votes = self.get_names(spans[2].tail)

        other_votes = []
        for span in spans[3:]:
            if span.text.startswith(('Absent', 'Excused')):
                other_votes += self.get_names(span.tail)
        for key, val in {'adopted': True, 'passed': True, 'failed': False}.items():
            if key in passed.lower():
                passed = val
                break
        vote = Vote(actor, date, motion, passed, int(yes_count), int(no_count),
                    int(other_count))
        for name in yes_votes:
            if name and name != 'None':
                vote.yes(name)
        for name in no_votes:
            if name and name != 'None':
                vote.no(name)
        for name in other_votes:
            if name and name != 'None':
                vote.other(name)
        return vote
Exemplo n.º 13
0
    def build_lower_votes(self):

        url = ('http://assembly.state.ny.us/leg/?'
               'default_fld=&bn=%s&term=%s&Votes=Y')
        url = url % (self.bill_id, self.term_start_year)
        self.urls.add(votes=url)
        self.bill.add_source(url)
        doc = self.urls.votes.doc
        if doc is None:
            return

        pre = doc.xpath('//pre')[0].text_content()
        no_votes = ('There are no votes for this bill in this '
                    'legislative session.')
        if pre == no_votes:
            return

        actual_vote = collections.defaultdict(list)
        for table in doc.xpath('//table'):

            date = table.xpath('caption/label[contains(., "DATE:")]')
            date = date[0].itersiblings().next().text
            date = datetime.datetime.strptime(date, '%m/%d/%Y')

            votes = table.xpath('caption/span/label[contains(., "YEA/NAY:")]')
            votes = votes[0].itersiblings().next().text
            yes_count, no_count = map(int, votes.split('/'))

            passed = yes_count > no_count
            vote = Vote('lower',
                        date,
                        'Floor Vote',
                        passed,
                        yes_count,
                        no_count,
                        other_count=0)

            tds = table.xpath('tr/td/text()')
            votes = iter(tds)
            while True:
                try:
                    data = list(islice(votes, 2))
                    name, vote_val = data
                except (StopIteration, ValueError):
                    # End of data. Stop.
                    break
                name = self._scrub_name(name)

                if vote_val.strip() == 'Y':
                    vote.yes(name)
                elif vote_val.strip() in ('N', 'NO'):
                    vote.no(name)
                else:
                    vote.other(name)
                    actual_vote[vote_val].append(name)

            # The page doesn't provide an other_count.
            vote['other_count'] = len(vote['other_votes'])
            vote['actual_vote'] = actual_vote
            self.bill.add_vote(vote)
Exemplo n.º 14
0
    def scrape_vote(self, bill, vote_chamber, bill_id, vote_id, vote_date,
                    action_text):
        url = ('http://alisondb.legislature.state.al.us/Alison/'
               'GetRollCallVoteResults.aspx?'
               'VOTE={0}&BODY={1}&INST={2}&SESS={3}'.
               format(vote_id, vote_chamber, bill_id, self.session_id))
        doc = lxml.html.fromstring(self.get(url=url).text)

        voters = {'Y': [], 'N': [], 'P': [], 'A': []}

        voters_and_votes = doc.xpath('//table/tr/td/font/text()')
        capture_vote = False
        name = ''
        for item in voters_and_votes:
            if capture_vote:
                capture_vote = False
                if name:
                    voters[item].append(name)
            else:
                capture_vote = True
                name = item
                if (name.endswith(", Vacant") or
                        name.startswith("Total ") or
                        not name.strip()):
                    name = ''

        # Check name counts against totals listed on the site
        total_yea = doc.xpath('//*[starts-with(text(), "Total Yea")]/text()')
        if total_yea:
            total_yea = int(total_yea[0].split(":")[-1])
            assert total_yea == len(voters['Y']), "Yea count incorrect"
        else:
            total_yea = len(voters['Y'])

        total_nay = doc.xpath('//*[starts-with(text(), "Total Nay")]/text()')
        if total_nay:
            total_nay = int(total_nay[0].split(":")[-1])
            assert total_nay == len(voters['N']), "Nay count incorrect"
        else:
            total_nay = len(voters['N'])

        total_absent = doc.xpath(
            '//*[starts-with(text(), "Total Absent")]/text()')
        if total_absent:
            total_absent = int(total_absent[0].split(":")[-1])
            assert total_absent == len(voters['A']), "Absent count incorrect"
        total_other = len(voters['P']) + len(voters['A'])

        vote = Vote(
            self.CHAMBERS[vote_chamber[0]], vote_date, action_text,
            total_yea > total_nay, total_yea, total_nay, total_other)
        vote.add_source(url)
        for member in voters['Y']:
            vote.yes(member)
        for member in voters['N']:
            vote.no(member)
        for member in (voters['A'] + voters['P']):
            vote.other(member)

        bill.add_vote(vote)
Exemplo n.º 15
0
 def apply_votes(self, bill):
     """Given a bill (and assuming it has a status_url in its dict), parse all of the votes
     """
     bill_votes = votes.all_votes_for_url(self, bill['status_url'])
     for (chamber,vote_desc,pdf_url,these_votes) in bill_votes:
         try:
             date = vote_desc.split("-")[-1]
         except IndexError:
             self.warning("[%s] Couldn't get date out of [%s]" % (bill['bill_id'],vote_desc))
             continue
         yes_votes = []
         no_votes = []
         other_votes = []
         for voter,vote in these_votes.iteritems():
             if vote == 'Y':
                 yes_votes.append(voter)
             elif vote == 'N':
                 no_votes.append(voter)
             else:
                 other_votes.append(voter)
         passed = len(yes_votes) > len(no_votes) # not necessarily correct, but not sure where else to get it. maybe from pdf
         vote = Vote(standardize_chamber(chamber),date,vote_desc,passed, len(yes_votes), len(no_votes), len(other_votes),pdf_url=pdf_url)
         for voter in yes_votes:
             vote.yes(voter)
         for voter in no_votes:
             vote.no(voter)
         for voter in other_votes:
             vote.other(voter)
         bill.add_vote(vote)
Exemplo n.º 16
0
    def vote(self):
        '''Return a billy vote.
        '''
        actual_vote_dict = self.vote_values()
        date = self.date()
        motion = self.motion()
        passed = self.passed()
        counts = self.get_counts()
        yes_count = sum(int(counts.get(key, 0)) for key in ('Yea', 'Yeas'))
        no_count = sum(int(counts.get(key, 0)) for key in ('Nay', 'Nays'))
        vote = Vote(self.chamber, date, motion,
                    passed, yes_count, no_count,
                    sum(map(int, counts.values())) - (yes_count + no_count))

        for k,v in actual_vote_dict.items():
            if k == "yes":
                for l in v:
                    vote.yes(l)
            elif k == "no":
                for l in v:
                    vote.no(l)
            elif k == "other":
                for l in v:
                    vote.other(l)

        vote.add_source(self.url)
        return vote
Exemplo n.º 17
0
    def scrape_vote(self, bill, vote_url, chamber, date):
        page = self.lxmlize(vote_url)

        motion = page.xpath(
            '//td/b/font[text()="MOTION:"]/../../following-sibling::td/font/text()'
        )[0]

        if 'withdrawn' not in motion:
            # Every table row after the one with VOTE in a td/div/b/font
            rolls = page.xpath(
                '//tr[preceding-sibling::tr/td/div/b/font/text()="VOTE"]')

            count_row = rolls[-1]
            yes_count = count_row.xpath(
                './/b/font[normalize-space(text())="YES:"]'
                '/../following-sibling::font[1]/text()')[0]
            no_count = count_row.xpath(
                './/b/font[normalize-space(text())="NO:"]'
                '/../following-sibling::font[1]/text()')[0]
            exc_count = count_row.xpath(
                './/b/font[normalize-space(text())="EXC:"]'
                '/../following-sibling::font[1]/text()')[0]
            nv_count = count_row.xpath(
                './/b/font[normalize-space(text())="ABS:"]'
                '/../following-sibling::font[1]/text()')[0]

            if count_row.xpath(
                    './/b/font[normalize-space(text())="FINAL ACTION:"]'
                    '/../following-sibling::b[1]/font/text()'):
                final = count_row.xpath(
                    './/b/font[normalize-space(text())="FINAL ACTION:"]'
                    '/../following-sibling::b[1]/font/text()')[0]
                passed = True if 'pass' in final.lower(
                ) or int(yes_count) > int(no_count) else False
            elif 'passed without objection' in motion.lower():
                passed = True
                yes_count = int(len(rolls[:-2]))
            else:
                self.warning("No vote breakdown found for %s" % vote_url)
                return

            other_count = int(exc_count) + int(nv_count)

            vote = Vote(chamber, date, motion, passed, int(yes_count),
                        int(no_count), int(other_count))

            for roll in rolls[:-2]:
                voter = roll.xpath('td[2]/div/font')[0].text_content()
                voted = roll.xpath('td[3]/div/font')[0].text_content().strip()
                if voted:
                    if 'Yes' in voted:
                        vote.yes(voter)
                    elif 'No' in voted:
                        vote.no(voter)
                    else:
                        vote.other(voter)
                elif 'passed without objection' in motion.lower() and voter:
                    vote.yes(voter)

            bill.add_vote(vote)
Exemplo n.º 18
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/H" in url:
            vote_chamber = 'lower'
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = 'upper'
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        page = self.get(url).text
        if 'BUDGET ADDRESS' in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath(
            "string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r'[^\d]*(\d+)[^\d]*', yes_count).group(1))

        no_count = page.xpath(
            "string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r'[^\d]*(\d+)[^\d]*', no_count).group(1))

        other_count = page.xpath("string(//span[contains(., 'Those absent')])")
        other_count = int(re.match(r'[^\d]*(\d+)[^\d]*', other_count).group(1))

        need_count = page.xpath("string(//span[contains(., 'Necessary for')])")
        need_count = int(re.match(r'[^\d]*(\d+)[^\d]*', need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r'.*Taken\s+on\s+(\d+/\s?\d+)', date).group(1)
        date = date.replace(' ', '')
        date = datetime.datetime.strptime(date + " " + bill['session'],
                                          "%m/%d %Y").date()

        vote = Vote(vote_chamber, date, name, yes_count > need_count,
                    yes_count, no_count, other_count)
        vote.add_source(url)

        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (i + name_offset)).strip()

                if not name or name == 'VACANT':
                    continue

                if "Y" in row.xpath("string(td[%d])" % (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" % (i + no_offset)):
                    vote.no(name)
                else:
                    vote.other(name)

        bill.add_vote(vote)
Exemplo n.º 19
0
    def _build_lower_votes(self):
        url = self.shared_url + '&Votes=Y'
        self.urls.add(votes=url)
        self.bill.add_source(url)
        doc = self.urls.votes.doc
        if doc is None:
            return

        # Grab bill information.
        try:
            pre = doc.xpath('//pre')[0].text_content()

            no_votes = ('There are no votes for this bill in this legislative '
                        'session.')

            if pre == no_votes:
                raise ValueError('No votes for this bill.')
        # Skip bill if votes can't be found.
        except (IndexError, ValueError) as e:
            return

        actual_vote = collections.defaultdict(list)
        for table in doc.xpath('//table'):

            date = table.xpath('caption/label[contains(., "DATE:")]')
            date = date[0].itersiblings().next().text
            date = datetime.datetime.strptime(date, '%m/%d/%Y')

            votes = table.xpath('caption/span/label[contains(., "YEA/NAY:")]')
            votes = votes[0].itersiblings().next().text
            yes_count, no_count = map(int, votes.split('/'))

            passed = yes_count > no_count
            vote = Vote('lower', date, 'Floor Vote', passed, yes_count,
                        no_count, other_count=0)

            tds = table.xpath('tr/td/text()')
            votes = iter(tds)
            while True:
                try:
                    data = list(islice(votes, 2))
                    name, vote_val = data
                except (StopIteration, ValueError):
                    # End of data. Stop.
                    break
                name = self._scrub_name(name)

                if vote_val.strip() == 'Y':
                    vote.yes(name)
                elif vote_val.strip() in ('N', 'NO'):
                    vote.no(name)
                else:
                    vote.other(name)
                    actual_vote[vote_val].append(name)

            # The page doesn't provide an other_count.
            vote['other_count'] = len(vote['other_votes'])
            vote['actual_vote'] = actual_vote
            self.bill.add_vote(vote)
Exemplo n.º 20
0
    def scrape_vote(self, bill, chamber, date, url):
        (path, resp) = self.urlretrieve(url)
        text = convert_pdf(path, 'text')
        os.remove(path)

        try:
            motion = text.split('\n')[4].strip()
        except IndexError:
            return

        try:
            yes_count = int(re.search(r'Yeas - (\d+)', text).group(1))
        except AttributeError:
            return

        no_count = int(re.search(r'Nays - (\d+)', text).group(1))
        other_count = int(re.search(r'Not Voting - (\d+)', text).group(1))
        passed = yes_count > (no_count + other_count)

        vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                    other_count)
        vote.add_source(url)

        y,n,o = 0,0,0
        break_outter = False

        for line in text.split('\n')[9:]:
            if break_outter:
                break

            if 'after roll call' in line:
                break
            if 'Indication of Vote' in line:
                break
            if 'Presiding' in line:
                continue

            for col in re.split(r'-\d+', line):
                col = col.strip()
                if not col:
                    continue

                match = re.match(r'(Y|N|EX|\*)\s+(.+)$', col)

                if match:
                    if match.group(2) == "PAIR":
                        break_outter = True
                        break
                    if match.group(1) == 'Y':
                        vote.yes(match.group(2))
                    elif match.group(1) == 'N':
                        vote.no(match.group(2))
                    else:
                        vote.other(match.group(2))
                else:
                    vote.other(col.strip())

        vote.validate()
        bill.add_vote(vote)
Exemplo n.º 21
0
    def build_lower_votes(self):

        url = "http://assembly.state.ny.us/leg/?" "default_fld=&bn=%s&term=%s&Votes=Y"
        url = url % (self.bill_id, self.term_start_year)
        self.urls.add(votes=url)
        self.bill.add_source(url)
        doc = self.urls.votes.doc
        if doc is None:
            return

        # Grab bill information.
        try:
            pre = doc.xpath("//pre")[0].text_content()

            no_votes = "There are no votes for this bill in this legislative " "session."

            if pre == no_votes:
                raise ValueError("No votes for this bill.")
        # Skip bill if votes can't be found.
        except (IndexError, ValueError) as e:
            return

        actual_vote = collections.defaultdict(list)
        for table in doc.xpath("//table"):

            date = table.xpath('caption/label[contains(., "DATE:")]')
            date = date[0].itersiblings().next().text
            date = datetime.datetime.strptime(date, "%m/%d/%Y")

            votes = table.xpath('caption/span/label[contains(., "YEA/NAY:")]')
            votes = votes[0].itersiblings().next().text
            yes_count, no_count = map(int, votes.split("/"))

            passed = yes_count > no_count
            vote = Vote("lower", date, "Floor Vote", passed, yes_count, no_count, other_count=0)

            tds = table.xpath("tr/td/text()")
            votes = iter(tds)
            while True:
                try:
                    data = list(islice(votes, 2))
                    name, vote_val = data
                except (StopIteration, ValueError):
                    # End of data. Stop.
                    break
                name = self._scrub_name(name)

                if vote_val.strip() == "Y":
                    vote.yes(name)
                elif vote_val.strip() in ("N", "NO"):
                    vote.no(name)
                else:
                    vote.other(name)
                    actual_vote[vote_val].append(name)

            # The page doesn't provide an other_count.
            vote["other_count"] = len(vote["other_votes"])
            vote["actual_vote"] = actual_vote
            self.bill.add_vote(vote)
Exemplo n.º 22
0
    def scrape_vote(self, bill, name, url):
        if "VOTE/H" in url:
            vote_chamber = "lower"
            cols = (1, 5, 9, 13)
            name_offset = 3
            yes_offset = 0
            no_offset = 1
        else:
            vote_chamber = "upper"
            cols = (1, 6)
            name_offset = 4
            yes_offset = 1
            no_offset = 2

        # Connecticut's SSL is causing problems with Scrapelib, so use Requests
        page = requests.get(url, verify=False).text

        if "BUDGET ADDRESS" in page:
            return

        page = lxml.html.fromstring(page)

        yes_count = page.xpath("string(//span[contains(., 'Those voting Yea')])")
        yes_count = int(re.match(r"[^\d]*(\d+)[^\d]*", yes_count).group(1))

        no_count = page.xpath("string(//span[contains(., 'Those voting Nay')])")
        no_count = int(re.match(r"[^\d]*(\d+)[^\d]*", no_count).group(1))

        other_count = page.xpath("string(//span[contains(., 'Those absent')])")
        other_count = int(re.match(r"[^\d]*(\d+)[^\d]*", other_count).group(1))

        need_count = page.xpath("string(//span[contains(., 'Necessary for')])")
        need_count = int(re.match(r"[^\d]*(\d+)[^\d]*", need_count).group(1))

        date = page.xpath("string(//span[contains(., 'Taken on')])")
        date = re.match(r".*Taken\s+on\s+(\d+/\s?\d+)", date).group(1)
        date = date.replace(" ", "")
        date = datetime.datetime.strptime(date + " " + bill["session"], "%m/%d %Y").date()

        vote = Vote(vote_chamber, date, name, yes_count > need_count, yes_count, no_count, other_count)
        vote.add_source(url)

        table = page.xpath("//table")[0]
        for row in table.xpath("tr"):
            for i in cols:
                name = row.xpath("string(td[%d])" % (i + name_offset)).strip()

                if not name or name == "VACANT":
                    continue

                if "Y" in row.xpath("string(td[%d])" % (i + yes_offset)):
                    vote.yes(name)
                elif "N" in row.xpath("string(td[%d])" % (i + no_offset)):
                    vote.no(name)
                else:
                    vote.other(name)

        bill.add_vote(vote)
Exemplo n.º 23
0
    def scrape_bill(self, session, bills):

        billdata, details = bills[0]

        (senate_url, assembly_url, bill_chamber, bill_type, bill_id,
         title, (letter, number, is_amd)) = details

        data = billdata['data']['bill']

        assembly = AssemblyBillPage(self, session, bill_chamber, details)
        assembly.build()
        bill = assembly.bill
        bill.add_source(billdata['url'])

        # Add companion.
        if data['sameAs']:
            bill.add_companion(data['sameAs'])

        if data['summary']:
            bill['summary'] = data['summary']

        if data['votes']:
            for vote_data in data['votes']:
                vote = Vote(
                    chamber='upper',
                    date=self.date_from_timestamp(vote_data['voteDate']),
                    motion=vote_data['description'] or '[No motion available.]',
                    passed=False,
                    yes_votes=[],
                    no_votes=[],
                    other_votes=[],
                    yes_count=0,
                    no_count=0,
                    other_count=0)

                for name in vote_data['ayes']:
                    vote.yes(name)
                    vote['yes_count'] += 1
                for names in map(vote_data.get, ['absent', 'excused', 'abstains']):
                    for name in names:
                        vote.other(name)
                        vote['other_count'] += 1
                for name in vote_data['nays']:
                    vote.no(name)
                    vote['no_count'] += 1

                vote['passed'] = vote['yes_count'] > vote['no_count']

                bill.add_vote(vote)

        # if data['previousVersions']:
        #   These are instances of the same bill from prior sessions.
        #     import pdb; pdb.set_trace()

        if not data['title']:
            bill['title'] = bill['summary']

        self.save_bill(bill)
Exemplo n.º 24
0
    def scrape_bill(self, session, bills):

        billdata, details = bills[0]

        (senate_url, assembly_url, bill_chamber, bill_type, bill_id, title, (letter, number, is_amd)) = details

        data = billdata["data"]["bill"]

        assembly = AssemblyBillPage(self, session, bill_chamber, details)
        assembly.build()
        bill = assembly.bill
        bill.add_source(billdata["url"])

        # Add companion.
        if data["sameAs"]:
            bill.add_companion(data["sameAs"])

        if data["summary"]:
            bill["summary"] = data["summary"]

        if data["votes"]:
            for vote_data in data["votes"]:
                vote = Vote(
                    chamber="upper",
                    date=self.date_from_timestamp(vote_data["voteDate"]),
                    motion=vote_data["description"] or "[No motion available.]",
                    passed=False,
                    yes_votes=[],
                    no_votes=[],
                    other_votes=[],
                    yes_count=0,
                    no_count=0,
                    other_count=0,
                )

                for name in vote_data["ayes"]:
                    vote.yes(name)
                    vote["yes_count"] += 1
                for names in map(vote_data.get, ["absent", "excused", "abstains"]):
                    for name in names:
                        vote.other(name)
                        vote["other_count"] += 1
                for name in vote_data["nays"]:
                    vote.no(name)
                    vote["no_count"] += 1

                vote["passed"] = vote["yes_count"] > vote["no_count"]

                bill.add_vote(vote)

        # if data['previousVersions']:
        #   These are instances of the same bill from prior sessions.
        #     import pdb; pdb.set_trace()

        if not data["title"]:
            bill["title"] = bill["summary"]

        self.save_bill(bill)
Exemplo n.º 25
0
    def scrape_vote(self, bill, date, url):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            header = page.xpath("string(//h4[contains(@id, 'hdVote')])")

            location = header.split(", ")[1]

            if location.startswith("House"):
                chamber = "lower"
            elif location.startswith("Senate"):
                chamber = "upper"
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            committee = " ".join(location.split(" ")[1:]).strip()
            if not committee or committee.startswith("of Representatives"):
                committee = None

            motion = ", ".join(header.split(", ")[2:]).strip()
            if not motion:
                # If we can't detect a motion, skip this vote
                return

            yes_count = int(page.xpath("string(//td[contains(@id, 'tdAyes')])"))
            no_count = int(page.xpath("string(//td[contains(@id, 'tdNays')])"))
            excused_count = int(page.xpath("string(//td[contains(@id, 'tdExcused')])"))
            absent_count = int(page.xpath("string(//td[contains(@id, 'tdAbsent')])"))
            other_count = excused_count + absent_count

            passed = yes_count > no_count

            if motion.startswith("Do Pass"):
                type = "passage"
            elif motion == "Concurred in amendments":
                type = "amendment"
            elif motion == "Veto override":
                type = "veto_override"
            else:
                type = "other"

            vote = Vote(chamber, date, motion, passed, yes_count, no_count, other_count)
            vote["type"] = type

            if committee:
                vote["committee"] = committee

            vote.add_source(url)

            for td in page.xpath("//table[contains(@id, 'tblVotes')]/tr/td"):
                if td.text == "Yea":
                    vote.yes(td.getprevious().text.strip())
                elif td.text == "Nay":
                    vote.no(td.getprevious().text.strip())
                elif td.text in ("Excused", "Absent"):
                    vote.other(td.getprevious().text.strip())

            bill.add_vote(vote)
Exemplo n.º 26
0
    def scrape_votes(self, bill, link):
        with self.urlopen(link) as page:
            page = lxml.html.fromstring(page)
            raw_vote_data = page.xpath("//span[@id='lblVoteData']")[0].text_content()
            raw_vote_data = re.split("\w+? by [\w ]+?\s+-", raw_vote_data.strip())[1:]
            for raw_vote in raw_vote_data:
                raw_vote = raw_vote.split(u"\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")
                motion = raw_vote[0]

                vote_date = re.search("(\d+/\d+/\d+)", motion)
                if vote_date:
                    vote_date = datetime.datetime.strptime(vote_date.group(), "%m/%d/%Y")

                passed = "Passed" in motion or "Recommended for passage" in motion or "Adopted" in raw_vote[1]
                vote_regex = re.compile("\d+$")
                aye_regex = re.compile("^.+voting aye were: (.+) -")
                no_regex = re.compile("^.+voting no were: (.+) -")
                other_regex = re.compile("^.+present and not voting were: (.+) -")
                yes_count = 0
                no_count = 0
                other_count = 0
                ayes = []
                nos = []
                others = []

                for v in raw_vote[1:]:
                    v = v.strip()
                    if v.startswith("Ayes...") and vote_regex.search(v):
                        yes_count = int(vote_regex.search(v).group())
                    elif v.startswith("Noes...") and vote_regex.search(v):
                        no_count = int(vote_regex.search(v).group())
                    elif v.startswith("Present and not voting...") and vote_regex.search(v):
                        other_count += int(vote_regex.search(v).group())
                    elif aye_regex.search(v):
                        ayes = aye_regex.search(v).groups()[0].split(", ")
                    elif no_regex.search(v):
                        nos = no_regex.search(v).groups()[0].split(", ")
                    elif other_regex.search(v):
                        others += other_regex.search(v).groups()[0].split(", ")

                if "ChamberVoting=H" in link:
                    chamber = "lower"
                else:
                    chamber = "upper"

                vote = Vote(chamber, vote_date, motion, passed, yes_count, no_count, other_count)
                vote.add_source(link)
                for a in ayes:
                    vote.yes(a)
                for n in nos:
                    vote.no(n)
                for o in others:
                    vote.other(o)

                vote.validate()
                bill.add_vote(vote)

        return bill
Exemplo n.º 27
0
    def scrape_vote(self, chamber, session, bill_id, vote_url):
        NO_VOTE_URL = 'http://www.house.leg.state.mn.us/votes/novotefound.asp'
        resp = self.get(vote_url)
        html = resp.text

        # sometimes the link is broken, will redirect to NO_VOTE_URL
        if resp.url == NO_VOTE_URL:
            return

        doc = lxml.html.fromstring(html)
        try:
            motion = doc.xpath("//div[@id='leg_PageContent']/div/h2/text()")[0]
        except IndexError:
            self.logger.warning("Bill was missing a motion number, skipping")
            return

        vote_count = doc.xpath(
            ".//div[@id='leg_PageContent']/div/h3/text()")[1].split()
        yeas = int(vote_count[0])
        nays = int(vote_count[3])

        # second paragraph has date
        paragraphs = doc.xpath(".//div[@id='leg_PageContent']/div/p/text()")
        date = None
        for p in paragraphs:
            try:
                date = datetime.datetime.strptime(p.strip(), '%m/%d/%Y')
                break
            except ValueError:
                pass
        if date is None:
            self.logger.warning("No date could be found for vote on %s" %
                                motion)
            return

        vote = Vote('lower',
                    date,
                    motion,
                    yeas > nays,
                    yeas,
                    nays,
                    0,
                    session=session,
                    bill_id=bill_id,
                    bill_chamber=chamber)
        vote.add_source(vote_url)

        # first table has YEAs
        for name in doc.xpath('//table[1]/tr/td/font/text()'):
            vote.yes(name.strip())

        # second table is nays
        for name in doc.xpath('//table[2]/tr/td/font/text()'):
            vote.no(name.strip())

        self.save_vote(vote)
Exemplo n.º 28
0
def record_votes(root, session):
    for el in root.xpath(u'//div[starts-with(., "Yeas \u2014")]'):
        text = ''.join(el.getprevious().getprevious().itertext())
        text.replace('\n', ' ')
        m = re.search(
            r'(?P<bill_id>\w+\W+\d+)(,?\W+as\W+amended,?)?\W+was\W+'
            '(?P<type>adopted|passed'
            '(\W+to\W+(?P<to>engrossment|third\W+reading))?)\W+'
            'by\W+\(Record\W+(?P<record>\d+)\):\W+'
            '(?P<yeas>\d+)\W+Yeas,\W+(?P<nays>\d+)\W+Nays,\W+'
            '(?P<present>\d+)\W+Present', text)
        if m:
            yes_count = int(m.group('yeas'))
            no_count = int(m.group('nays'))
            other_count = int(m.group('present'))

            bill_id = m.group('bill_id')
            bill_id = bill_id.replace(u'\xa0', ' ')
            bill_id = re.sub(r'CS(SB|HB)', r'\1', bill_id)

            if bill_id.startswith('H') or bill_id.startswith('CSHB'):
                bill_chamber = 'lower'
            elif bill_id.startswith('S') or bill_id.startswith('CSSB'):
                bill_chamber = 'upper'
            else:
                continue

            motion = get_motion(m)

            vote = Vote(None, None, motion, True, yes_count, no_count,
                        other_count)
            vote['bill_id'] = bill_id
            vote['bill_chamber'] = bill_chamber
            vote['session'] = session[0:2]
            vote['method'] = 'record'
            vote['record'] = m.group('record')
            vote['type'] = get_type(motion)

            for name in names(el):
                vote.yes(name)

            el = next_tag(el)
            if el.text and el.text.startswith('Nays'):
                for name in names(el):
                    vote.no(name)
                el = next_tag(el)

            while el.text and re.match(r'Present|Absent', el.text):
                for name in names(el):
                    vote.other(name)
                el = next_tag(el)

            vote['other_count'] = len(vote['other_votes'])
            yield vote
        else:
            pass
Exemplo n.º 29
0
    def parse_vote(self, bill, vote_date, vote_chamber, vote_status, vote_url):
        vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'
        vote_date = datetime.datetime.strptime(vote_date, '%a %d %b %Y')

        vote_doc, resp = self.urlretrieve(vote_url)

        subprocess.check_call('abiword --to=ksvote.txt %s' % vote_doc,
                              shell=True,
                              cwd='/tmp/')
        vote_lines = open('/tmp/ksvote.txt').readlines()

        os.remove(vote_doc)

        vote = None
        passed = True
        for line in vote_lines:
            line = line.strip()
            totals = re.findall(
                'Yeas (\d+)[;,] Nays (\d+)[;,] (?:Present but not voting:|Present and Passing) (\d+)[;,] (?:Absent or not voting:|Absent or Not Voting) (\d+)',
                line)
            if totals:
                totals = totals[0]
                yeas = int(totals[0])
                nays = int(totals[1])
                nv = int(totals[2])
                absent = int(totals[3])
                # default passed to true
                vote = Vote(vote_chamber, vote_date, vote_status, True, yeas,
                            nays, nv + absent)
            elif line.startswith('Yeas:'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.yes(member)
            elif line.startswith('Nays:'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.no(member)
            elif line.startswith('Present '):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.other(member)
            elif line.startswith('Absent or'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.other(member)
            elif 'the motion did not prevail' in line:
                passed = False

        if vote:
            vote['passed'] = passed
            vote.add_source(vote_url)
            bill.add_vote(vote)
Exemplo n.º 30
0
    def scrape_vote(self, bill, vote_url, chamber, date):
        page = self.lxmlize(vote_url)

        try:
            motion = page.xpath('//td/b/font[text()="MOTION:"]/../../following-sibling::td/font/text()')[0]
        except:
            self.warning("Vote Summary Page Broken ")
            return

        if 'withdrawn' not in motion:
            # Every table row after the one with VOTE in a td/div/b/font
            rolls = page.xpath('//tr[preceding-sibling::tr/td/div/b/font/text()="VOTE"]')

            count_row = rolls[-1]
            yes_count = count_row.xpath('.//b/font[normalize-space(text())="YES:"]'
                                        '/../following-sibling::font[1]/text()')[0]
            no_count = count_row.xpath('.//b/font[normalize-space(text())="NO:"]'
                                       '/../following-sibling::font[1]/text()')[0]
            exc_count = count_row.xpath('.//b/font[normalize-space(text())="EXC:"]'
                                        '/../following-sibling::font[1]/text()')[0]
            nv_count = count_row.xpath('.//b/font[normalize-space(text())="ABS:"]'
                                       '/../following-sibling::font[1]/text()')[0]

            if count_row.xpath('.//b/font[normalize-space(text())="FINAL ACTION:"]'
                               '/../following-sibling::b[1]/font/text()'):
                final = count_row.xpath('.//b/font[normalize-space(text())="FINAL ACTION:"]'
                                        '/../following-sibling::b[1]/font/text()')[0]
                passed = True if 'pass' in final.lower() or int(yes_count) > int(no_count) else False
            elif 'passed without objection' in motion.lower():
                passed = True
                yes_count = int(len(rolls[:-2]))
            else:
                self.warning("No vote breakdown found for %s" % vote_url)
                return


            other_count = int(exc_count) + int(nv_count)

            vote = Vote(chamber, date, motion, passed,
                        int(yes_count), int(no_count), int(other_count))

            for roll in rolls[:-2]:
                voter = roll.xpath('td[2]/div/font')[0].text_content()
                voted = roll.xpath('td[3]/div/font')[0].text_content().strip()
                if voted:
                    if 'Yes' in voted:
                        vote.yes(voter)
                    elif 'No' in voted:
                        vote.no(voter)
                    else:
                        vote.other(voter)
                elif 'passed without objection' in motion.lower() and voter:
                    vote.yes(voter)

            bill.add_vote(vote)
Exemplo n.º 31
0
    def scrape_votes_old(self, bill, billname, session):
        vote_url = ('http://archives.legislature.state.oh.us/bills.cfm?ID=' +
                    session + '_' + billname)

        page = self.get(vote_url).text
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = datetime.datetime.strptime(jlink.text,
                                              "%m/%d/%Y").date()

            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == 'House':
                chamber = 'lower'
            elif chamber == 'Senate':
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = Vote(chamber, date, motion, yes_count > no_count,
                        yes_count, no_count, 0)

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            bill.add_vote(vote)
Exemplo n.º 32
0
def record_votes(root, session):
    for el in root.xpath(u'//div[starts-with(., "Yeas \u2014")]'):
        text = ''.join(el.getprevious().getprevious().itertext())
        text.replace('\n', ' ')
        m = re.search(r'(?P<bill_id>\w+\W+\d+)(,?\W+as\W+amended,?)?\W+was\W+'
                      '(?P<type>adopted|passed'
                      '(\W+to\W+(?P<to>engrossment|third\W+reading))?)\W+'
                      'by\W+\(Record\W+(?P<record>\d+)\):\W+'
                      '(?P<yeas>\d+)\W+Yeas,\W+(?P<nays>\d+)\W+Nays,\W+'
                      '(?P<present>\d+)\W+Present', text)
        if m:
            yes_count = int(m.group('yeas'))
            no_count = int(m.group('nays'))
            other_count = int(m.group('present'))

            bill_id = m.group('bill_id')
            bill_id = bill_id.replace(u'\xa0', ' ')
            bill_id = re.sub(r'CS(SB|HB)', r'\1', bill_id)

            if bill_id.startswith('H') or bill_id.startswith('CSHB'):
                bill_chamber = 'lower'
            elif bill_id.startswith('S') or bill_id.startswith('CSSB'):
                bill_chamber = 'upper'
            else:
                continue

            motion = get_motion(m)

            vote = Vote(None, None, motion, True,
                        yes_count, no_count, other_count)
            vote['bill_id'] = bill_id
            vote['bill_chamber'] = bill_chamber
            vote['session'] = session[0:2]
            vote['method'] = 'record'
            vote['record'] = m.group('record')
            vote['type'] = get_type(motion)

            for name in names(el):
                vote.yes(name)

            el = next_tag(el)
            if el.text and el.text.startswith('Nays'):
                for name in names(el):
                    vote.no(name)
                el = next_tag(el)

            while el.text and re.match(r'Present|Absent', el.text):
                for name in names(el):
                    vote.other(name)
                el = next_tag(el)

            vote['other_count'] = len(vote['other_votes'])
            yield vote
        else:
            pass
Exemplo n.º 33
0
    def scrape_votes(self, bill, bill_prefix, number, session):
        vote_url = ('http://www.legislature.state.oh.us/votes.cfm?ID=' +
                    session + '_' + bill_prefix + '_' + str(number))

        page = self.urlopen(vote_url)
        page = lxml.html.fromstring(page)

        for jlink in page.xpath("//a[contains(@href, 'JournalText')]"):
            date = datetime.datetime.strptime(jlink.text,
                                              "%m/%d/%Y").date()

            details = jlink.xpath("string(../../../td[2])")

            chamber = details.split(" - ")[0]
            if chamber == 'House':
                chamber = 'lower'
            elif chamber == 'Senate':
                chamber = 'upper'
            else:
                raise ScrapeError("Bad chamber: %s" % chamber)

            motion = details.split(" - ")[1].split("\n")[0].strip()

            vote_row = jlink.xpath("../../..")[0].getnext()

            yea_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Yea')]")[0]
            yeas = []
            for td in yea_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    yeas.append(name)

            no_div = vote_row.xpath(
                "td/font/div[contains(@id, 'Nay')]")[0]
            nays = []
            for td in no_div.xpath("table/tr/td"):
                name = td.xpath("string()")
                if name:
                    nays.append(name)

            yes_count = len(yeas)
            no_count = len(nays)

            vote = Vote(chamber, date, motion, yes_count > no_count,
                        yes_count, no_count, 0)

            for yes in yeas:
                vote.yes(yes)
            for no in nays:
                vote.no(no)

            vote.add_source(vote_url)

            bill.add_vote(vote)
Exemplo n.º 34
0
    def scrape_votes(self, bill, votes_url):
        html = self.urlopen(votes_url)
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(votes_url)

        EXPECTED_VOTE_CODES = ['Y','N','E','NV','A','P','-']

        # vote indicator, a few spaces, a name, newline or multiple spaces
        VOTE_RE = re.compile('(Y|N|E|NV|A|P|-)\s{2,5}(\w.+?)(?:\n|\s{2})')

        for link in doc.xpath('//a[contains(@href, "votehistory")]'):

            pieces = link.text.split(' - ')
            date = pieces[-1]
            if len(pieces) == 3:
                motion = pieces[1]
            else:
                motion = 'Third Reading'

            chamber = link.xpath('../following-sibling::td/text()')[0]
            if chamber == 'HOUSE':
                chamber = 'lower'
            elif chamber == 'SENATE':
                chamber = 'upper'
            else:
                self.warning('unknown chamber %s' % chamber)

            date = datetime.datetime.strptime(date, "%A, %B %d, %Y")

            # download the file
            fname, resp = self.urlretrieve(link.get('href'))
            pdflines = convert_pdf(fname, 'text').splitlines()
            os.remove(fname)

            vote = Vote(chamber, date, motion.strip(), False, 0, 0, 0)

            for line in pdflines:
                for match in VOTE_RE.findall(line):
                    vcode, name = match
                    if vcode == 'Y':
                        vote.yes(name)
                    elif vcode == 'N':
                        vote.no(name)
                    else:
                        vote.other(name)

            # fake the counts
            vote['yes_count'] = len(vote['yes_votes'])
            vote['no_count'] = len(vote['no_votes'])
            vote['other_count'] = len(vote['other_votes'])
            vote['passed'] = vote['yes_count'] > vote['no_count']
            vote.add_source(link.get('href'))

            bill.add_vote(vote)
Exemplo n.º 35
0
    def parse_vote(self, bill, vote_date, vote_chamber, vote_status, vote_url):
        vote_chamber = 'upper' if vote_chamber == 'Senate' else 'lower'
        vote_date = datetime.datetime.strptime(vote_date, '%a %d %b %Y')

        vote_doc, resp = self.urlretrieve(vote_url)

        subprocess.check_call('abiword --to=ksvote.txt %s' % vote_doc,
                              shell=True, cwd='/tmp/')
        vote_lines = open('/tmp/ksvote.txt').readlines()

        os.remove(vote_doc)

        vote = None
        passed = True
        for line in vote_lines:
            line = line.strip()
            totals = re.findall('Yeas (\d+)[;,] Nays (\d+)[;,] (?:Present but not voting:|Present and Passing) (\d+)[;,] (?:Absent or not voting:|Absent or Not Voting) (\d+)',
                                line)
            if totals:
                totals = totals[0]
                yeas = int(totals[0])
                nays = int(totals[1])
                nv = int(totals[2])
                absent = int(totals[3])
                # default passed to true
                vote = Vote(vote_chamber, vote_date, vote_status,
                            True, yeas, nays, nv+absent)
            elif line.startswith('Yeas:'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.yes(member)
            elif line.startswith('Nays:'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.no(member)
            elif line.startswith('Present '):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.other(member)
            elif line.startswith('Absent or'):
                line = line.split(':', 1)[1].strip()
                for member in line.split(', '):
                    if member != 'None.':
                        vote.other(member)
            elif 'the motion did not prevail' in line:
                passed = False

        if vote:
            vote['passed'] = passed
            vote.add_source(vote_url)
            bill.add_vote(vote)
Exemplo n.º 36
0
    def parse_roll_call(self, url, chamber, date):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            motion_divs = page.xpath("//div[@class='font8text']")
            motion = motion_divs[3].text.strip()
            if not motion:
                try:
                    motion = motion_divs[3].getnext().tail.strip()
                except AttributeError:
                    motion = motion_divs[4].text.strip()

            if motion == 'FP':
                motion = 'FINAL PASSAGE'

            if motion == 'FINAL PASSAGE':
                type = 'passage'
            elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion):
                type = 'amendment'
            else:
                type = 'other'

            if not motion:
                motion = 'Unknown'

            yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text)
            nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text)
            lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text)
            nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text)
            other = lve + nv

            passed = yeas > (nays + other)

            vote = Vote(chamber,
                        date,
                        motion,
                        passed,
                        yeas,
                        nays,
                        other,
                        type=type)

            for span in page.xpath("//span[text() = 'Y' or text() = 'N'"
                                   "or text() = 'X' or text() = 'E']"):
                name = span.getnext().text.strip()

                if span.text == 'Y':
                    vote.yes(name)
                elif span.text == 'N':
                    vote.no(name)
                else:
                    vote.other(name)

            return vote
Exemplo n.º 37
0
    def scrape_chamber_votes(self, chamber, session, url):
        xml = self.urlopen(url)
        doc = lxml.etree.fromstring(xml)

        for vxml in doc.xpath('//vote'):
            legislation = vxml.get('legislation')
            motion = vxml.get('caption')
            timestamp = datetime.datetime.strptime(vxml.get('dateTime'),
                                                   '%Y-%m-%dT%H:%M:%S')

            leg_prefix = legislation.split(' ')[0]
            if leg_prefix in ('SB', 'SR'):
                bill_chamber = 'upper'
            elif leg_prefix in ('HB', 'HR'):
                bill_chamber = 'lower'
            elif leg_prefix in ('', 'EX', 'ELECTION'):
                continue
            else:
                raise Exception('unknown legislation prefix: ' + legislation)
            # skip bills from other chamber
            if bill_chamber != chamber:
                continue

            unknown_count = int(vxml.xpath('totals/@unknown')[0])
            excused_count = int(vxml.xpath('totals/@excused')[0])
            nv_count = int(vxml.xpath('totals/@not-voting')[0])
            no_count = int(vxml.xpath('totals/@nays')[0])
            yes_count = int(vxml.xpath('totals/@yeas')[0])
            other_count = unknown_count + excused_count + nv_count

            vote = Vote(chamber,
                        timestamp,
                        motion,
                        passed=yes_count > no_count,
                        yes_count=yes_count,
                        no_count=no_count,
                        other_count=other_count,
                        session=session,
                        bill_id=legislation,
                        bill_chamber=bill_chamber)
            vote.add_source(url)

            for m in vxml.xpath('member'):
                vote_letter = m.get('vote')
                member = m.get('name')
                if vote_letter == 'Y':
                    vote.yes(member)
                elif vote_letter == 'N':
                    vote.no(member)
                else:
                    vote.other(member)

            self.save_vote(vote)
Exemplo n.º 38
0
    def get_lower_votes(self):

        url = ('http://assembly.state.ny.us/leg/?'
               'default_fld=&bn=%s&term=%s&Votes=Y')
        url = url % (self.bill_id, self.term_start_year)
        doc = self.url2lxml(url)
        if doc is None:
            return

        pre = doc.xpath('//pre')[0].text_content()
        no_votes = ('There are no votes for this bill in this '
                    'legislative session.')
        if pre == no_votes:
            return

        actual_vote = collections.defaultdict(list)
        for table in doc.xpath('//table'):

            date = table.xpath('caption/label[contains(., "DATE:")]')
            date = date[0].itersiblings().next().text
            date = datetime.datetime.strptime(date, '%m/%d/%Y')

            votes = table.xpath('caption/span/label[contains(., "YEA/NAY:")]')
            votes = votes[0].itersiblings().next().text
            yes_count, no_count = map(int, votes.split('/'))

            passed = yes_count > no_count
            vote = Vote('lower', date, 'Floor Vote', passed, yes_count,
                        no_count, other_count=0)

            tds = table.xpath('tr/td/text()')
            votes = iter(tds)
            while True:
                try:
                    data = list(islice(votes, 2))
                    name, vote_val = data
                except (StopIteration, ValueError):
                    # End of data. Stop.
                    break
                name = self._scrub_name(name)

                if vote_val.strip() == 'Y':
                    vote.yes(name)
                elif vote_val.strip() in ('N', 'NO'):
                    vote.no(name)
                else:
                    vote.other(name)
                    actual_vote[vote_val].append(name)

            # The page doesn't provide an other_count.
            vote['other_count'] = len(vote['other_votes'])
            vote['actual_vote'] = actual_vote
            self.bill.add_vote(vote)
Exemplo n.º 39
0
    def scrape_vote(self, bill, motion, url):
        page = self.get(url, retry_on_404=True).text
        page = lxml.html.fromstring(page)

        yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0]
        yes_count = int(yeas_cell.xpath("string(following-sibling::td)"))

        nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0]
        no_count = int(nays_cell.xpath("string(following-sibling::td)"))

        abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0]
        abs_count = int(abs_cell.xpath("string(following-sibling::td)"))

        ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0]
        ex_count = int(ex_cell.xpath("string(following-sibling::td)"))

        other_count = abs_count + ex_count

        if 'chamber=House' in url:
            chamber = 'lower'
        elif 'chamber=Senate' in url:
            chamber = 'upper'

        date_cell = page.xpath("//td[text() = 'Date:']")[0]
        date = date_cell.xpath("string(following-sibling::td)")
        try:
            date = datetime.datetime.strptime(date, "%B %d, %Y")
        except ValueError:
            date = datetime.datetime.strptime(date, "%b. %d, %Y")

        outcome_cell = page.xpath("//td[text()='Outcome:']")[0]
        outcome = outcome_cell.xpath("string(following-sibling::td)")

        vote = Vote(chamber, date, motion,
                    outcome == 'PREVAILS',
                    yes_count, no_count, other_count)
        vote.add_source(url)

        member_cell = page.xpath("//td[text() = 'Member']")[0]
        for row in member_cell.xpath("../../tr")[1:]:
            name = row.xpath("string(td[2])")
            # name = name.split(" of ")[0]

            vtype = row.xpath("string(td[4])")
            if vtype == 'Y':
                vote.yes(name)
            elif vtype == 'N':
                vote.no(name)
            elif vtype == 'X' or vtype == 'E':
                vote.other(name)

        bill.add_vote(vote)
Exemplo n.º 40
0
    def _parse_senate_votes(self, vote_data):
        vote_datetime = datetime.datetime.strptime(vote_data['voteDate'],
            '%Y-%m-%d')

        vote = Vote(
            chamber='upper',
            date=vote_datetime.date(),
            motion='[No motion available.]',
            passed=False,
            yes_votes=[],
            no_votes=[],
            other_votes=[],
            yes_count=0,
            no_count=0,
            other_count=0)

        if vote_data['voteType'] == 'FLOOR':
            vote['motion'] = 'Floor Vote'
        elif vote_data['voteType'] == 'COMMITTEE':
            vote['motion'] = '{} Vote'.format(vote_data['committee']['name'])
        else:
            raise ValueError('Unknown vote type encountered.')

        vote_rolls = vote_data['memberVotes']['items']

        # Count all yea votes.
        if 'items' in vote_rolls.get('AYE', {}):
            for legislator in vote_rolls['AYE']['items']:
                vote.yes(legislator['fullName'])
                vote['yes_count'] += 1
        if 'items' in vote_rolls.get('AYEWR', {}):
            for legislator in vote_rolls['AYEWR']['items']:
                vote.yes(legislator['fullName'])
                vote['yes_count'] += 1

        # Count all nay votes.
        if 'items' in vote_rolls.get('NAY', {}):
            for legislator in vote_rolls['NAY']['items']:
                vote.no(legislator['fullName'])
                vote['no_count'] += 1

        # Count all other types of votes.
        other_vote_types = ('EXC', 'ABS', 'ABD')
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]['items']:
                    vote.other(legislator['fullName'])
                    vote['other_count'] += 1

        vote['passed'] = vote['yes_count'] > vote['no_count']

        return vote
Exemplo n.º 41
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            "YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)" "(.*)ABSENT( OR NOT VOTING)? -?\s?" "(\d+)(.*)",
            re.MULTILINE | re.DOTALL,
        )
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == "upper" or actor == "lower":
            vote_chamber = actor
            vote_location = ""
        else:
            vote_chamber = ""
            vote_location = actor

        vote = Vote(
            vote_chamber,
            date,
            motion,
            passed,
            yes_count,
            no_count,
            other_count,
            location=vote_location,
            _vote_id=uniqid,
        )
        vote.add_source(url)

        yes_votes = re.split("\s{2,}", match.group(2).strip())
        no_votes = re.split("\s{2,}", match.group(4).strip())
        other_votes = re.split("\s{2,}", match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.other(other)

        bill.add_vote(vote)
Exemplo n.º 42
0
    def scrape_vote(self, bill, motion, url):
        page = self.urlopen(url, retry_on_404=True)
        page = lxml.html.fromstring(page)

        yeas_cell = page.xpath("//td[text() = 'Yeas (Y):']")[0]
        yes_count = int(yeas_cell.xpath("string(following-sibling::td)"))

        nays_cell = page.xpath("//td[text() = 'Nays (N):']")[0]
        no_count = int(nays_cell.xpath("string(following-sibling::td)"))

        abs_cell = page.xpath("//td[text() = 'Absent (X):']")[0]
        abs_count = int(abs_cell.xpath("string(following-sibling::td)"))

        ex_cell = page.xpath("//td[text() = 'Excused (E):']")[0]
        ex_count = int(ex_cell.xpath("string(following-sibling::td)"))

        other_count = abs_count + ex_count

        if 'chamber=House' in url:
            chamber = 'lower'
        elif 'chamber=Senate' in url:
            chamber = 'upper'

        date_cell = page.xpath("//td[text() = 'Date:']")[0]
        date = date_cell.xpath("string(following-sibling::td)")
        try:
            date = datetime.datetime.strptime(date, "%B %d, %Y")
        except ValueError:
            date = datetime.datetime.strptime(date, "%b. %d, %Y")

        outcome_cell = page.xpath("//td[text()='Outcome:']")[0]
        outcome = outcome_cell.xpath("string(following-sibling::td)")

        vote = Vote(chamber, date, motion,
                    outcome == 'PREVAILS',
                    yes_count, no_count, other_count)
        vote.add_source(url)

        member_cell = page.xpath("//td[text() = 'Member']")[0]
        for row in member_cell.xpath("../../tr")[1:]:
            name = row.xpath("string(td[2])")
            # name = name.split(" of ")[0]

            vtype = row.xpath("string(td[4])")
            if vtype == 'Y':
                vote.yes(name)
            elif vtype == 'N':
                vote.no(name)
            elif vtype == 'X' or vtype == 'E':
                vote.other(name)

        bill.add_vote(vote)
Exemplo n.º 43
0
    def scrape_votes(self, bill, sponsor, link):
        with self.urlopen(link) as page:
            page = lxml.html.fromstring(page)
            raw_vote_data = page.xpath(
                "//span[@id='lblVoteData']")[0].text_content()
            raw_vote_data = raw_vote_data.strip().split(
                '%s by %s - ' % (bill['bill_id'], sponsor))[1:]
            for raw_vote in raw_vote_data:
                raw_vote = raw_vote.split(
                    u'\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0')
                motion = raw_vote[0]

                vote_date = re.search('(\d+/\d+/\d+)', motion)
                if vote_date:
                    vote_date = datetime.datetime.strptime(
                        vote_date.group(), '%m/%d/%Y')

                passed = ('Passed' in motion) or ('Adopted' in raw_vote[1])
                vote_regex = re.compile('\d+$')
                aye_regex = re.compile('^.+voting aye were: (.+) -')
                no_regex = re.compile('^.+voting no were: (.+) -')
                yes_count = None
                no_count = None
                other_count = 0
                ayes = []
                nos = []

                for v in raw_vote[1:]:
                    if v.startswith('Ayes...') and vote_regex.search(v):
                        yes_count = int(vote_regex.search(v).group())
                    elif v.startswith('Noes...') and vote_regex.search(v):
                        no_count = int(vote_regex.search(v).group())
                    elif aye_regex.search(v):
                        ayes = aye_regex.search(v).groups()[0].split(', ')
                    elif no_regex.search(v):
                        nos = no_regex.search(v).groups()[0].split(', ')

                if yes_count and no_count:
                    passed = yes_count > no_count
                else:
                    yes_count = no_count = 0

                vote = Vote(bill['chamber'], vote_date, motion, passed,
                            yes_count, no_count, other_count)
                vote.add_source(link)
                for a in ayes:
                    vote.yes(a)
                for n in nos:
                    vote.no(n)
                bill.add_vote(vote)

        return bill
Exemplo n.º 44
0
    def scrape_vote(self, bill, chamber, url):
        page = self.urlopen(url)
        if 'There are no details available for this roll call' in page:
            return
        page = page.replace('&nbsp;', ' ')
        page = lxml.html.fromstring(page)

        info_row = page.xpath("//table[1]/tr[2]")[0]

        date = info_row.xpath("string(td[1])")
        date = datetime.datetime.strptime(date, "%m/%d/%Y")

        motion = info_row.xpath("string(td[2])")
        yes_count = int(info_row.xpath("string(td[3])"))
        no_count = int(info_row.xpath("string(td[4])"))
        other_count = int(info_row.xpath("string(td[5])"))
        passed = info_row.xpath("string(td[6])") == 'Pass'

        if motion == 'Shall the bill pass?':
            type = 'passage'
        elif motion == 'Shall the bill be read the third time?':
            type = 'reading:3'
        elif 'be amended as' in motion:
            type = 'amendment'
        else:
            type = 'other'

        vote = Vote(chamber, date, motion, passed,
                    yes_count, no_count, other_count)
        vote.add_source(url)

        for tr in page.xpath("//table[1]/tr")[3:]:
            if len(tr.xpath("td")) != 2:
                continue


            # avoid splitting duplicate names
            name = tr.xpath("string(td[1])").strip()
            if not name.startswith(DOUBLED_NAMES):
                name = name.split(' of')[0]

            type = tr.xpath("string(td[2])").strip()
            if type.startswith('Yea'):
                vote.yes(name)
            elif type.startswith('Nay'):
                vote.no(name)
            elif type.startswith('Not Voting'):
                pass
            else:
                vote.other(name)

        bill.add_vote(vote)
Exemplo n.º 45
0
    def scrape_vote(self, bill, vote_id):
        vote_url = 'https://legis.delaware.gov/json/RollCall/GetRollCallVoteByRollCallId'
        form = {
            'rollCallId': vote_id,
            'sort': '',
            'group': '',
            'filter': '',
        }
        response = self.post(url=vote_url, data=form, allow_redirects=True)
        if response.content:
            page = json.loads(response.content)
            roll = page['Model']
            vote_chamber = self.chamber_map[roll['ChamberName']]
            #"7/1/16 01:00 AM"
            vote_date = datetime.strptime(roll['TakenAtDateTime'],
                                          '%m/%d/%y %I:%M %p')

            #TODO: What does this code mean?
            vote_motion = roll['RollCallVoteType']

            vote_passed = True if roll['RollCallStatus'] == 'Passed' else False
            other_count = int(roll['NotVotingCount']) + int(
                roll['VacantVoteCount']) + int(roll['AbsentVoteCount']) + int(
                    roll['ConflictVoteCount'])

            vote = Vote(chamber=vote_chamber,
                        date=vote_date,
                        motion=vote_motion,
                        passed=vote_passed,
                        yes_count=roll['YesVoteCount'],
                        no_count=roll['NoVoteCount'],
                        other_count=other_count)

            for row in roll['AssemblyMemberVotes']:
                # AssemblyMemberId looks like it should work here,
                # but for some sessions it's bugged to only return session
                try:
                    voter = self.legislators_by_short[str(row['ShortName'])]
                    name = voter['DisplayName']
                except KeyError:
                    self.warning('could not find legislator short name %s',
                                 row['ShortName'])
                    name = row['ShortName']
                if row['SelectVoteTypeCode'] == 'Y':
                    vote.yes(name)
                elif row['SelectVoteTypeCode'] == 'N':
                    vote.no(name)
                else:
                    vote.other(name)

            bill.add_vote(vote)
Exemplo n.º 46
0
    def scrape_votes(self, bill, page):
        for b in page.xpath("//div/b[starts-with(., 'VOTE: FLOOR VOTE:')]"):
            date = b.text.split('-')[1].strip()
            date = datetime.datetime.strptime(date, "%b %d, %Y").date()

            yes_votes, no_votes, other_votes = [], [], []
            yes_count, no_count, other_count = 0, 0, 0

            vtype = None
            for tag in b.xpath("following-sibling::blockquote/*"):
                if tag.tag == 'b':
                    text = tag.text
                    if text.startswith('Ayes'):
                        vtype = 'yes'
                        yes_count = int(re.search(
                            r'\((\d+)\):', text).group(1))
                    elif text.startswith('Nays'):
                        vtype = 'no'
                        no_count = int(re.search(
                            r'\((\d+)\):', text).group(1))
                    elif (text.startswith('Excused') or
                          text.startswith('Abstain') or
                          text.startswith('Absent')
                         ):
                        vtype = 'other'
                        other_count += int(re.search(
                            r'\((\d+)\):', text).group(1))
                    else:
                        raise ValueError('bad vote type: %s' % tag.text)
                elif tag.tag == 'a':
                    name = tag.text.strip()
                    if vtype == 'yes':
                        yes_votes.append(name)
                    elif vtype == 'no':
                        no_votes.append(name)
                    elif vtype == 'other':
                        other_votes.append(name)

            passed = yes_count > (no_count + other_count)

            vote = Vote('upper', date, 'Floor Vote', passed, yes_count,
                        no_count, other_count)

            for name in yes_votes:
                vote.yes(name)
            for name in no_votes:
                vote.no(name)
            for name in other_votes:
                vote.other(name)

            bill.add_vote(vote)
Exemplo n.º 47
0
    def _parse_senate_votes(self, vote_data):
        vote_datetime = datetime.datetime.strptime(vote_data['voteDate'],
                                                   '%Y-%m-%d')

        vote = Vote(chamber='upper',
                    date=vote_datetime.date(),
                    motion='[No motion available.]',
                    passed=False,
                    yes_votes=[],
                    no_votes=[],
                    other_votes=[],
                    yes_count=0,
                    no_count=0,
                    other_count=0)

        if vote_data['voteType'] == 'FLOOR':
            vote['motion'] = 'Floor Vote'
        elif vote_data['voteType'] == 'COMMITTEE':
            vote['motion'] = '{} Vote'.format(vote_data['committee']['name'])
        else:
            raise ValueError('Unknown vote type encountered.')

        vote_rolls = vote_data['memberVotes']['items']

        # Count all yea votes.
        if 'items' in vote_rolls.get('AYE', {}):
            for legislator in vote_rolls['AYE']['items']:
                vote.yes(legislator['fullName'])
                vote['yes_count'] += 1
        if 'items' in vote_rolls.get('AYEWR', {}):
            for legislator in vote_rolls['AYEWR']['items']:
                vote.yes(legislator['fullName'])
                vote['yes_count'] += 1

        # Count all nay votes.
        if 'items' in vote_rolls.get('NAY', {}):
            for legislator in vote_rolls['NAY']['items']:
                vote.no(legislator['fullName'])
                vote['no_count'] += 1

        # Count all other types of votes.
        other_vote_types = ('EXC', 'ABS', 'ABD')
        for vote_type in other_vote_types:
            if vote_rolls.get(vote_type, []):
                for legislator in vote_rolls[vote_type]['items']:
                    vote.other(legislator['fullName'])
                    vote['other_count'] += 1

        vote['passed'] = vote['yes_count'] > vote['no_count']

        return vote
Exemplo n.º 48
0
    def scrape_votes(self, bill_page, bill, insert, year):
        root = lxml.html.fromstring(bill_page)
        for link in root.xpath('//a[contains(text(), "Passage")]'):
            motion = link.text
            if 'Assembly' in motion:
                chamber = 'lower'
            else:
                chamber = 'upper'
            vote_url = 'http://www.leg.state.nv.us/Session/%s/Reports/%s' % (
                insert, link.get('href'))
            bill.add_source(vote_url)
            page = self.urlopen(vote_url)
            page = page.replace(u"\xa0", " ")
            root = lxml.html.fromstring(page)

            date = root.xpath('//h1/text()')[-1].strip()
            if not date:
                date = root.xpath('//h1/text()')[-2].strip()
            date = datetime.strptime(date, "%B %d, %Y at %H:%M %p")
            top_block_text = root.xpath(
                '//div[@align="center"]')[0].text_content()
            yes_count = int(re.findall("(\d+) Yea", top_block_text)[0])
            no_count = int(re.findall("(\d+) Nay", top_block_text)[0])
            excused = int(re.findall("(\d+) Excused", top_block_text)[0])
            not_voting = int(re.findall("(\d+) Not Voting", top_block_text)[0])
            absent = int(re.findall("(\d+) Absent", top_block_text)[0])
            other_count = excused + not_voting + absent
            passed = yes_count > no_count

            vote = Vote(chamber,
                        date,
                        motion,
                        passed,
                        yes_count,
                        no_count,
                        other_count,
                        not_voting=not_voting,
                        absent=absent)

            for el in root.xpath('//table[2]/tr'):
                tds = el.xpath('td')
                name = tds[1].text_content().strip()
                vote_result = tds[2].text_content().strip()

                if vote_result == 'Yea':
                    vote.yes(name)
                elif vote_result == 'Nay':
                    vote.no(name)
                else:
                    vote.other(name)
            bill.add_vote(vote)
Exemplo n.º 49
0
    def scrape_votes(self, bill, page):
        for b in page.xpath("//div/b[starts-with(., 'VOTE: FLOOR VOTE:')]"):
            date = b.text.split('-')[1].strip()
            date = datetime.datetime.strptime(date, "%b %d, %Y").date()

            yes_votes, no_votes, other_votes = [], [], []
            yes_count, no_count, other_count = 0, 0, 0

            vtype = None
            for tag in b.xpath("following-sibling::blockquote/*"):
                if tag.tag == 'b':
                    text = tag.text
                    if text.startswith('Ayes'):
                        vtype = 'yes'
                        yes_count = int(re.search(
                            r'\((\d+)\):', text).group(1))
                    elif text.startswith('Nays'):
                        vtype = 'no'
                        no_count = int(re.search(
                            r'\((\d+)\):', text).group(1))
                    elif (text.startswith('Excused') or
                          text.startswith('Abstains') or
                          text.startswith('Absent')
                         ):
                        vtype = 'other'
                        other_count += int(re.search(
                            r'\((\d+)\):', text).group(1))
                    else:
                        raise ValueError('bad vote type: %s' % tag.text)
                elif tag.tag == 'a':
                    name = tag.text.strip()
                    if vtype == 'yes':
                        yes_votes.append(name)
                    elif vtype == 'no':
                        no_votes.append(name)
                    elif vtype == 'other':
                        other_votes.append(name)

            passed = yes_count > (no_count + other_count)

            vote = Vote('upper', date, 'Floor Vote', passed, yes_count,
                        no_count, other_count)

            for name in yes_votes:
                vote.yes(name)
            for name in no_votes:
                vote.no(name)
            for name in other_votes:
                vote.other(name)

            bill.add_vote(vote)
Exemplo n.º 50
0
    def scrape_vote(self, bill, chamber, url):
        page = self.urlopen(url)
        if 'There are no details available for this roll call' in page:
            return
        page = page.replace('&nbsp;', ' ')
        page = lxml.html.fromstring(page)

        info_row = page.xpath("//table[1]/tr[2]")[0]

        date = info_row.xpath("string(td[1])")
        date = datetime.datetime.strptime(date, "%m/%d/%Y")

        motion = info_row.xpath("string(td[2])")
        yes_count = int(info_row.xpath("string(td[3])"))
        no_count = int(info_row.xpath("string(td[4])"))
        other_count = int(info_row.xpath("string(td[5])"))
        passed = info_row.xpath("string(td[6])") == 'Pass'

        if motion == 'Shall the bill pass?':
            type = 'passage'
        elif motion == 'Shall the bill be read the third time?':
            type = 'reading:3'
        elif 'be amended as' in motion:
            type = 'amendment'
        else:
            type = 'other'

        vote = Vote(chamber, date, motion, passed, yes_count, no_count,
                    other_count)
        vote.add_source(url)

        for tr in page.xpath("//table[1]/tr")[3:]:
            if len(tr.xpath("td")) != 2:
                continue

            # avoid splitting duplicate names
            name = tr.xpath("string(td[1])").strip()
            if not name.startswith(DOUBLED_NAMES):
                name = name.split(' of')[0]

            type = tr.xpath("string(td[2])").strip()
            if type.startswith('Yea'):
                vote.yes(name)
            elif type.startswith('Nay'):
                vote.no(name)
            elif type.startswith('Not Voting'):
                pass
            else:
                vote.other(name)

        bill.add_vote(vote)
Exemplo n.º 51
0
    def parse_vote(self, bill, actor, date, motion, url, uniqid):
        page = self.get(url).text
        bill.add_source(url)
        vote_re = re.compile(
            'YEAS -?\s?(\d+)(.*)NAYS -?\s?(\d+)'
            '(.*)ABSENT( OR NOT VOTING)? -?\s?'
            '(\d+)(.*)', re.MULTILINE | re.DOTALL)
        match = vote_re.search(page)
        yes_count = int(match.group(1))
        no_count = int(match.group(3))
        other_count = int(match.group(6))

        if yes_count > no_count:
            passed = True
        else:
            passed = False

        if actor == 'upper' or actor == 'lower':
            vote_chamber = actor
            vote_location = ''
        else:
            vote_chamber = ''
            vote_location = actor

        vote = Vote(vote_chamber,
                    date,
                    motion,
                    passed,
                    yes_count,
                    no_count,
                    other_count,
                    location=vote_location,
                    _vote_id=uniqid)
        vote.add_source(url)

        yes_votes = re.split('\s{2,}', match.group(2).strip())
        no_votes = re.split('\s{2,}', match.group(4).strip())
        other_votes = re.split('\s{2,}', match.group(7).strip())

        for yes in yes_votes:
            if yes:
                vote.yes(yes)
        for no in no_votes:
            if no:
                vote.no(no)
        for other in other_votes:
            if other:
                vote.other(other)

        bill.add_vote(vote)
    def process_vote(self, data):
        chamber = parse_psuedo_id(data['organization'])['classification']
        bill_chamber, bill_id = self.get_bill_details(data['bill'])
        if chamber == 'legislature':
            chamber = 'upper'
        if bill_chamber == 'legislature':
            bill_chamber = 'upper'

        yes_count = None
        no_count = None
        other_count = 0
        for vc in data['counts']:
            if vc['option'] == 'yes':
                yes_count = vc['value']
            elif vc['option'] == 'no':
                no_count = vc['value']
            else:
                other_count += vc['value']

        vote = Vote(
            chamber=chamber,
            date=parse_date(data['start_date']),
            motion=data['motion_text'],
            passed=data['result'] == 'pass',
            yes_count=yes_count,
            no_count=no_count,
            other_count=other_count,
            action=data['bill_action'],
            # TODO: was data['motion_classification'],
            type='other',
            session=data['legislative_session'],
            bill_chamber=bill_chamber,
            bill_id=bill_id,
        )

        for vr in data['votes']:
            if vr['option'] == 'yes':
                vote.yes(vr['voter_name'])
            elif vr['option'] == 'no':
                vote.no(vr['voter_name'])
            else:
                vote.other(vr['voter_name'])

        for source in data['sources']:
            vote.add_source(source['url'])

        vote.update(**data['extras'])

        self.save_vote(vote)
Exemplo n.º 53
0
    def scrape_votes(self, bill_page, bill, insert, year):
        root = lxml.html.fromstring(bill_page)
        for link in root.xpath('//a[contains(text(), "Passage")]'):
            motion = link.text
            if "Assembly" in motion:
                chamber = "lower"
            else:
                chamber = "upper"
            vote_url = "http://www.leg.state.nv.us/Session/%s/Reports/%s" % (insert, link.get("href"))
            bill.add_source(vote_url)
            with self.urlopen(vote_url) as page:
                page = page.decode("utf8").replace(u"\xa0", " ")
                root = lxml.html.fromstring(page)

                date = root.xpath("//h1/text()")[-1].strip()
                date = datetime.strptime(date, "%B %d, %Y at %H:%M %p")
                top_block_text = root.xpath('//div[@align="center"]')[0].text_content()
                yes_count = int(re.findall("(\d+) Yea", top_block_text)[0])
                no_count = int(re.findall("(\d+) Nay", top_block_text)[0])
                excused = int(re.findall("(\d+) Excused", top_block_text)[0])
                not_voting = int(re.findall("(\d+) Not Voting", top_block_text)[0])
                absent = int(re.findall("(\d+) Absent", top_block_text)[0])
                other_count = excused + not_voting + absent
                passed = yes_count > no_count

                vote = Vote(
                    chamber,
                    date,
                    motion,
                    passed,
                    yes_count,
                    no_count,
                    other_count,
                    not_voting=not_voting,
                    absent=absent,
                )

                for el in root.xpath("//table[2]/tr"):
                    tds = el.xpath("td")
                    name = tds[1].text_content().strip()
                    vote_result = tds[2].text_content().strip()

                    if vote_result == "Yea":
                        vote.yes(name)
                    elif vote_result == "Nay":
                        vote.no(name)
                    else:
                        vote.other(name)
                bill.add_vote(vote)
Exemplo n.º 54
0
    def parse_roll_call(self, url, chamber, date):
        with self.urlopen(url) as page:
            page = lxml.html.fromstring(page)

            motion_divs = page.xpath("//div[@class='font8text']")
            motion = motion_divs[3].text.strip()
            if not motion:
                try:
                    motion = motion_divs[3].getnext().tail.strip()
                except AttributeError:
                    motion = motion_divs[4].text.strip()

            if motion == 'FP':
                motion = 'FINAL PASSAGE'

            if motion == 'FINAL PASSAGE':
                type = 'passage'
            elif re.match(r'CONCUR(RENCE)? IN \w+ AMENDMENTS', motion):
                type = 'amendment'
            else:
                type = 'other'

            if not motion:
                motion = 'Unknown'

            yeas = int(page.xpath("//div[text() = 'YEAS']")[0].getnext().text)
            nays = int(page.xpath("//div[text() = 'NAYS']")[0].getnext().text)
            lve = int(page.xpath("//div[text() = 'LVE']")[0].getnext().text)
            nv = int(page.xpath("//div[text() = 'N/V']")[0].getnext().text)
            other = lve + nv

            passed = yeas > (nays + other)

            vote = Vote(chamber, date, motion, passed, yeas, nays, other,
                        type=type)

            for span in page.xpath("//span[text() = 'Y' or text() = 'N'"
                                   "or text() = 'X' or text() = 'E']"):
                name = span.getnext().text.strip()

                if span.text == 'Y':
                    vote.yes(name)
                elif span.text == 'N':
                    vote.no(name)
                else:
                    vote.other(name)

            return vote
Exemplo n.º 55
0
    def scrape_vote(self, bill, vote_type_id, vote_type):
        base_url = 'http://dcclims1.dccouncil.us/lims/voting.aspx?VoteTypeID=%s&LegID=%s'
        url = base_url % (vote_type_id, bill['bill_id'])

        with self.urlopen(url) as html:
            doc = lxml.html.fromstring(html)

            vote_date = convert_date(doc.get_element_by_id('VoteDate').text)

            # check if voice vote / approved boxes have an 'x'
            voice = (
                doc.xpath('//span[@id="VoteTypeVoice"]/b/text()')[0] == 'x')
            passed = (doc.xpath('//span[@id="VoteResultApproved"]/b/text()')[0]
                      == 'x')

            yes_count = extract_int(
                doc.xpath('//span[@id="VoteCount1"]/b/text()')[0])
            no_count = extract_int(
                doc.xpath('//span[@id="VoteCount2"]/b/text()')[0])

            other_count = 0
            for n in xrange(3, 9):
                other_count += extract_int(
                    doc.xpath('//span[@id="VoteCount%s"]/b/text()' % n)[0])

            vote = Vote('upper',
                        vote_date,
                        vote_type,
                        passed,
                        yes_count,
                        no_count,
                        other_count,
                        voice_vote=voice)

            vote.add_source(url)

            # members are only text on page in a <u> tag
            for member_u in doc.xpath('//u'):
                member = member_u.text
                # normalize case
                vote_text = member_u.xpath('../../i/text()')[0].upper()
                if 'YES' in vote_text:
                    vote.yes(member)
                elif 'NO' in vote_text:
                    vote.no(member)
                else:
                    vote.other(member)
        bill.add_vote(vote)
Exemplo n.º 56
0
 def parse_vote(self, bill, action, chamber, date):
     pattern = r"were as follows: (?P<n_yes>\d+) Aye\(?s\)?:\s+(?P<yes>.*?);\s+Aye\(?s\)? with reservations:\s+(?P<yes_resv>.*?);\s+(?P<n_no>\d*) No\(?es\)?:\s+(?P<no>.*?);\s+and (?P<n_excused>\d*) Excused: (?P<excused>.*)"
     if 'as follows' in action:
         result = re.search(pattern, action).groupdict()
         motion = action.split('.')[0] + '.'
         vote = Vote(chamber, date, motion, 'PASSED' in action,
                     int(result['n_yes'] or 0), int(result['n_no'] or 0),
                     int(result['n_excused'] or 0))
         for voter in split_specific_votes(result['yes']):
             vote.yes(voter)
         for voter in split_specific_votes(result['yes_resv']):
             vote.yes(voter)
         for voter in split_specific_votes(result['no']):
             vote.no(voter)
         for voter in split_specific_votes(result['excused']):
             vote.other(voter)
         bill.add_vote(vote)
Exemplo n.º 57
0
    def parse_vote(self, actor, date, row):
        """
        takes the actor, date and row element and returns a Vote object
        """
        spans = row.xpath('.//span')
        motion = row.text
        passed, yes_count, no_count, other_count = spans[0].text_content(
        ).split('-')
        yes_votes = [
            name
            for name in spans[1].tail.replace(u'\xa0--\xa0', '').split(',')
            if name
        ]

        no_votes = [
            name
            for name in spans[2].tail.replace(u'\xa0--\xa0', '').split(',')
            if name
        ]
        other_votes = []
        if spans[3].text.startswith('Absent'):
            other_votes = [
                name
                for name in spans[3].tail.replace(u'\xa0--\xa0', '').split(',')
                if name
            ]
        for key, val in {
                'adopted': True,
                'passed': True,
                'failed': False
        }.items():
            if key in passed.lower():
                passed = val
                break
        vote = Vote(actor, date, motion, passed, int(yes_count), int(no_count),
                    int(other_count))
        for name in yes_votes:
            if name and name != 'None':
                vote.yes(name)
        for name in no_votes:
            if name and name != 'None':
                vote.no(name)
        for name in other_votes:
            if name and name != 'None':
                vote.other(name)
        return vote