def scrape_votes(self, chamber, url, bill, date, **kwargs): """ Scrapes the votes from a vote detail page with the legislator's names this handles all of the votes and expects the following keyword arguments: motion an Arizona Vote object will have the following additional fields: additional vote counts: +not_voting, +excused, +absent, +present additional vote lists +NV, +EX, +AB, +P this depends on the chamber and the committee """ o_args = {} passed = '' # to test if we need to compare vote counts later v_type = kwargs.pop('type') if 'passed' in kwargs: passed = {'PASSED': True, 'FAILED': False}[kwargs.pop('passed')] if 'AMEND' in kwargs: o_args['amended'] = kwargs.pop('AMEND').text_content().strip() if 'motion' in kwargs: motion = kwargs.pop('motion') if 'EMER' in kwargs and kwargs['EMER'].text_content().strip(): o_args['EMER'] = kwargs.pop('EMER').text_content().strip() if '2/3 VOTE' in kwargs and kwargs['2/3 VOTE'].text_content().strip(): o_args['2/3 VOTE'] = kwargs.pop('2/3 VOTE').text_content().strip() if 'committee' in kwargs: o_args['committee'] = utils.get_committee_name(kwargs.pop('committee'), chamber) with self.urlopen(url) as vote_page: root = html.fromstring(vote_page) vote_table = root.xpath('/html/body/div/table/tr[3]/td[4]/table/tr/td/table/tr/td/table')[0] vote_count = vote_table.xpath('following-sibling::p/following-sibling::text()') vote_string = vote_count[0].replace(u'\xa0', '').strip() v_count = re.compile(r'\b[A-Z]*\s*[A-z]*:\s\d*') v_list = v_count.findall(vote_string) o_count = 0 for x in v_list: k, v = x.split(':') # make NOT VOTING not_voting k = k.strip().replace(' ', '_').lower() v = int(v.strip()) if k == 'ayes': yes_count = int(v) elif k == 'nays': no_count = int(v) else: o_args.update({str(k):v}) o_count += int(v) if passed == '': passed = yes_count > no_count if 'committee' not in o_args: if chamber == 'upper' and passed: if 'EMER' in o_args or '2/3 VOTE' in o_args: passed = yes_count > 20 else: passed = yes_count > 16 elif chamber == 'lower' and passed: if 'EMER' in o_args or '2/3 VOTE' in o_args: passed = yes_count > 40 else: passed = yes_count > 31 vote = Vote(chamber, date, motion, passed, yes_count, no_count, o_count, type=v_type, **o_args) vote.add_source(url) # grab all the tables descendant tds tds = vote_table.xpath('descendant::td') # pair 'em up matched = [ tds[y:y+2] for y in range(0, len(tds), 2) ] for name, v in iter(matched): v = v.text_content().strip() name = name.text_content().strip() if name == 'Member Name': continue if v == 'Y': vote.yes(name) elif v == 'N': vote.no(name) else: if v in vote: vote[v].append(name) else: vote[v] = [name] vote.other(name) bill.add_vote(vote)
def scrape_actions(self, chamber, session, bill): """ Scrape the actions for a given bill """ ses_num = utils.legislature_to_number(session) bill_id = bill['bill_id'].replace(' ', '') action_url = BASE_URL + 'FormatDocument.asp?inDoc=/legtext/%s/bills/%so.asp' % (ses_num, bill_id.lower()) with self.urlopen(action_url) as action_page: bill.add_source(action_url) root = html.fromstring(action_page) base_table = root.xpath('//table[@class="ContentAreaBackground"]')[0] # common xpaths table_path = '//table[contains(tr/td/b/text(), "%s")]' #sponsors sponsors = base_table.xpath('//sponsor') for sponsor in sponsors: name = sponsor.text.strip() # sponsor.xpath('string(ancestor::td[1]/following-sibling::td[1]/text())').strip() s_type = sponsor.getparent().getparent().getnext().text_content().strip() bill.add_sponsor(s_type, name) #titles table = base_table.xpath(table_path % 'TITLE') if table: for row in table[0].iterchildren('tr'): title = row[1].text_content().strip() if title != bill['title']: bill.add_title(title) for table in base_table.xpath('tr/td/table'): action = table.xpath('string(tr[1]/td[1])').strip() if action == '': action = table.xpath('string(tr[1])').strip() if (action.endswith('FIRST READ:') or action.endswith('SECOND READ:') or 'WAIVED' in action): rows = table.xpath('tr') for row in rows: action = row[0].text_content().strip()[:-1] actor = 'lower' if action.startswith('H') else 'upper' date = utils.get_date(row[1]) # bill:introduced if (action.endswith('FIRST READ') or action.endswith('FIRST WAIVED')): if actor == chamber: a_type = ['bill:introduced', 'bill:reading:1'] else: a_type = 'bill:reading:1' bill.add_action(actor, action, date, type=a_type) else: a_type = 'bill:reading:2' bill.add_action(actor, action, date, type=a_type) continue elif action == 'COMMITTEES:': # committee assignments rows = table.xpath('tr')[1:] for row in rows: # First add the committee assigned action meta_tag = row.cssselect('meta')[0] h_or_s = meta_tag.get('name')[0] # @name is HCOMMITTEE OR SCOMMITTEE committee = meta_tag.get('content') # @content is committee abbrv #actor is house or senate referring the bill to committee actor = 'lower' if h_or_s.lower() == 'h' else 'upper' act = 'assigned to committee: ' + utils.get_committee_name(committee, actor) date = utils.get_date(row[1]) bill.add_action(actor, act, date, type='committee:referred') # now lets see if there is a vote vote_url = row[0].xpath('string(a/@href)') if vote_url: date = utils.get_date(row[3]) act = row[5].text_content().strip() a_type = get_action_type(act, 'COMMITTEES:') act = get_verbose_action(act) bill.add_action(actor, utils.get_committee_name(committee, actor) + ":" + act, date, type=a_type, abbrv=committee) self.scrape_votes(actor, vote_url, bill, date, motion='committee: ' + act, committee=committee, type='other') elif len(row) == 5: # probably senate rules committee date = utils.get_date(row[3]) if date == '': date = utils.get_date(row[1]) act = row[4].text_content().strip() a_type = get_action_type(act, 'COMMITTEES:') act = get_verbose_action(act) bill.add_action(actor, utils.get_committee_name(committee, actor) + ":" + act, date, type=a_type, abbrv=committee) continue elif 'CAUCUS' in action: rows = table.xpath('tr')[0:2] for row in rows: actor = utils.get_actor(row, chamber) action = row[0].text_content().strip() if action.endswith(':'): action = action[:-1] result = row[2].text_content().strip() # majority caucus Y|N action = action + " recommends to concur: " + result date = utils.get_date(row[1]) bill.add_action(actor, action, date, concur=result, type='other') continue # transmit to house or senate elif 'TRANSMIT TO' in action: rows = table.xpath('tr') for row in rows: action = row[0].text_content().strip()[:-1] actor = 'upper' if action.endswith('HOUSE') else 'lower' date = utils.get_date(row[1]) bill.add_action(actor, action, date, type='other') continue # Committee of the whole actions elif 'COW ACTION' in action: rows = table.xpath('tr') actor = utils.get_actor(rows[0], chamber) if 'SIT COW ACTION' in action: act = rows[0][-1].text_content().strip() date = utils.get_date(rows[0][1]) else: act = rows[1][2].text_content().strip() date = utils.get_date(rows[1][1]) action = action + " " + get_verbose_action(act) # COW ACTION 1 DPA bill.add_action(actor, action, date, type='other') if rows[1][0].text_content().strip() == 'Vote Detail': vote_url = rows[1][0].xpath('string(a/@href)') self.scrape_votes(actor, vote_url, bill, date, motion=action, type='other', extra=act) continue # AMENDMENTS elif 'AMENDMENTS' in action: rows = table.xpath('tr')[1:] for row in rows: act = row.text_content().strip() if act == '': continue if 'passed' in act or 'adopted' in act: a_type = 'amendment:passed' elif 'failed' in act: a_type = 'amendment:failed' elif 'withdrawn' in act: a_type = 'amendment:withdrawn' else: a_type = 'other' # actor and date will same as previous action bill.add_action(actor, act, date, type=a_type) continue # CONFERENCE COMMITTEE # http://www.azleg.gov/FormatDocument.asp?inDoc=/legtext/49Leg/2r/bills/hb2083o.asp # MISCELLANEOUS MOTION # MOTION TO RECONSIDER elif action == 'MOTION TO RECONSIDER:': date = utils.get_date(table[1][1]) if date: if table[1][0].text_content().strip() == 'Vote Detail': vote_url = table[1][0].xpath('string(a/@href)') bill.add_action(actor, action, date, type=a_type) self.scrape_votes(actor, vote_url, bill, date, motion='motion to reconsider', type='other') else: action = table[-1][1].text_content().strip() bill.add_action(actor, action, date, type='other') continue elif (action.endswith('FINAL READ:') or action.endswith('THIRD READ:')): # house|senate final and third read rows = table.xpath('tr') # need to find out if third read took place in house or senate # if an ancestor table contains 'TRANSMIT TO' then the action # is taking place in that chamber, else it is in chamber actor = utils.get_actor(rows[0], chamber) # get a dict of keys from the header and values from the row k_rows = utils.get_rows(rows[1:], rows[0]) action = rows[0][0].text_content().strip() for row in k_rows: a_type = [get_action_type(action, 'Generic')] if row[action].text_content().strip() == 'Vote Detail': vote_url = row.pop(action).xpath('string(a/@href)') vote_date = utils.get_date(row.pop('DATE')) passed = row.pop('RESULT').text_content().strip() # leaves vote counts, ammended, emergency, two-thirds # and possibly rfe left in k_rows. get the vote counts # from scrape votes and pass ammended and emergency # as kwargs to sort them in scrap_votes pass_fail = {'PASSED': 'bill:passed', 'FAILED': 'bill:failed'}[passed] a_type.append(pass_fail) bill.add_action(actor, action, vote_date, type=a_type) row['type'] = 'passage' self.scrape_votes(actor, vote_url, bill, vote_date, passed=passed, motion=action, **row) else: date = utils.get_date(row.pop('DATE')) if date: bill.add_action(actor, action, date, type=a_type) continue elif 'TRANSMITTED TO' in action: # transmitted to Governor or secretary of the state # SoS if it goes to voters as a proposition and memorials, etc rows = table.xpath('tr') actor = utils.get_actor(rows[0], chamber) # actor is the actor from the previous statement because it is # never transmitted to G or S without third or final read sent_to = rows[0][1].text_content().strip() date = utils.get_date(rows[0][2]) a_type = 'governor:received' if sent_to[0] == 'G' else 'other' bill.add_action(actor, "TRANSMITTED TO " + sent_to, date, type=a_type) # See if the actor is the governor and whether he signed # the bill or vetoed it act, date, chapter, version = '', '', '', '' for row in rows[1:]: if row[0].text_content().strip() == 'ACTION:': act = row[1].text_content().strip() date = utils.get_date(row[2]) elif row[0].text_content().strip() == 'CHAPTER:': chapter = row[1].text_content().strip() elif row[0].text_content().strip() == 'CHAPTERED VERSION:': version = row[1].text_content().strip() elif row[0].text_content().strip() == 'TRANSMITTED VERSION:': version = row[1].text_content().strip() if act and sent_to == 'GOVERNOR': a_type = 'governor:signed' if act == 'SIGNED' else 'governor:vetoed' if chapter: bill.add_action(sent_to.lower(), act, date, type=a_type, chapter=chapter, chaptered_version=version) else: bill.add_action(sent_to.lower(), act, date, type=a_type) continue # this is probably only important for historical legislation elif 'FINAL DISPOSITION' in action: rows = table.xpath('tr') if rows: disposition = rows[0][1].text_content().strip() bill['final_disposition'] = disposition bill = self.sort_bill_actions(bill) self.save_bill(bill)