import re from billy.scrape.actions import Rule, BaseCategorizer # These are regex patterns that map to action categories. _categorizer_rules = ( Rule([u'Amendment #\\S+ \\((?P<legislator>.+?)\\) bundle YES adopted'], [u'amendment:passed']), Rule([u'(?i)Signed by (the )Governor'], [u'committee:referred']), Rule([u'Accompanied (by )?(?P<bill_id>[SH]\\S+)'], []), Rule([u'Discharged to the committee on (?P<committees>.+)'], [u'committee:referred']), Rule([u'(?i)Amendment #\\d+ adopted'], [u'amendment:passed']), Rule([ u'Amendment #\\d+ \\((?P<legislator>.+?)\\) rejected', u'(?i)amendment.+?rejected' ], [u'amendment:failed']), Rule([u'(?is)Amendment \\S+ withdrawn'], [u'amendment:withdrawn']), Rule([u'Amendment #\\S+ \\((?P<legislator>.+?)\\) Pending'], [u'amendment:introduced']), Rule([u'(?P<bill>[HS]\\d+)'], []), Rule([u'(?i)Amendment \\(#\\d+\\) adopted'], [u'amendment:passed']), Rule([u'(?i)with veto'], [u'governor:vetoed']), Rule([u'reported favorably by committee'], [u'committee:passed:favorable']), Rule([u'Accompan\\S+ .+?(?P<bill_id>[SH]\\S+)'], []), Rule([u'(?i)Amendment \\d+ pending'], [u'amendment:tabled']), Rule([u'Read,'], [u'bill:reading:1']), Rule([ u'(?i)Amendment #\\S+ \\((?P<legislator>.+?)\\)\\s+-\\s+rejected', u'(?i)Amendment \\d+ rejected',
# u'NRMW': '', u'PSEP': u'Public Safety & Emergency Preparedness', u'SGTA': u'State Government & Tribal Affairs', u'TEC': u'Technology, Energy & Communications', u'TR': u'Transportation', u'TRAN': u'Transportation', u'WAYS': u'Ways & Means' } committee_names = committees_abbrs.values() committees_rgx = '(%s)' % '|'.join( sorted(committee_names, key=len, reverse=True)) # These are regex patterns that map to action categories. _categorizer_rules = ( Rule(r'yeas, (?P<yes_votes>\d+); nays, (?P<no_votes>\d+); ' r'absent, (?P<absent_voters>\d+); excused, (?P<excused_voters>\d+)'), Rule(r'Committee on (?P<committees>.+?) at \d'), Rule(r'(?P<committees>.+?) relieved of further'), Rule(r'Passed to (?P<committees>.+?) for \S+ reading'), Rule(r'by (?P<committees>.+?) Committee'), Rule(r'^Adopted', 'bill:passed'), Rule(r'^Introduced', 'bill:introduced'), Rule(r'^Introduced', 'bill:introduced'), Rule(r'Third reading, adopted', ['bill:reading:3', 'bill:passed']), Rule(r'amendment adopted', 'amendment:passed'), Rule(r'amendment not adopted', 'amendment:failed'), Rule(r"(?i)third reading, (?P<pass_fail>(passed|failed))", 'bill:reading:3'), Rule(r'Read first time', 'bill:reading:1'), Rule(r"(?i)first reading, referred to (?P<committees>.*)\.", 'bill:reading:1'),
from billy.scrape.actions import Rule, BaseCategorizer # These are regex patterns that map to action categories. _categorizer_rules = ( Rule(r'Amendment proposed on floor', 'amendment:introduced'), Rule(r'Amendment failed', 'amendment:failed'), Rule(r'Amendment adopted, placed on calendar', ''), Rule(r'^Filed with ', 'bill:introduced'), Rule(r'^Introduced', 'bill:introduced'), Rule(r'^Second reading', 'bill:reading:2'), Rule(r'passed as amended', 'bill:passed'), Rule(r'passed', 'bill:passed'), Rule(r'Sent to Governor', 'governor:received'), Rule(r'Reported back', 'committee:passed'), Rule(r'Reported back.*do pass', 'committee:passed:favorable'), Rule(r'Reported back.*do not pass', 'committee:passed:unfavorable'), Rule(r'^Signed by Governor', 'governor:signed'), ) class NDCategorizer(BaseCategorizer): rules = _categorizer_rules
import re from billy.scrape.actions import Rule, BaseCategorizer # These are regex patterns that map to action categories. _categorizer_rules = ( # Parse vote counts--possibly useful in future. Rule('Roll\s+Call\s+\d+:\s+yeas\s+(?P<yes_votes>\d+),' '\s+nays\s+(?P<no_votes>\d+)'), Rule(r'(?i)voice vote', voice_vote=True), Rule(r'Effective (?P<effective_date>.+)'), # Same for member names. Rule(('(?i)(co)?authored by (representative|senator)s?\s+' '(?P<legislators>.+)')), Rule((r'(?P<version>Amendment \d+)\s+\(\s*(?P<legislators>.+?)\)' r'.+?failed'), 'amendment:failed'), Rule((r'(?P<version>Amendment \d+)\s+\(\s*(?P<legislators>.+?)\)' r'.+?withdrawn'), 'amendment:withdrawn'), Rule((r'(?P<version>Amendment \d+)\s+\(\s*(?P<legislators>.+?)\)' r'.+ruled out of order'), 'amendment:failed'), Rule(r'(?i)^(senator|representative)s?(?P<legislators>.+?)\s+added'), Rule(r'(?i)^Senate\s+(advisor|sponsor|conferee)s?.*?:\s+' r'(?P<legislators>.+)'), Rule(r'(House|Senate) sponsors?:\s+Senators\s+(?P<legislators>.+)'), Rule(r'Senators (?P<legislators>.+?) added as (?:co)?sponsors'), Rule('(?i)(co)?sponsors: (?P<legislators>.+)'), # Amendments. Rule((r'(?P<version>Amendment \d+)\s+\(\s*(?P<legislators>.+?)\)' r'.+?prevailed'), 'amendment:passed'),
import re from billy.scrape.actions import Rule, BaseCategorizer # These are regex patterns that map to action categories. _categorizer_rules = ( # Capture some groups. Rule(r'Senators (?P<legislators>.+) a committee of conference'), Rule(r"(?P<version>Printer's No. \d+)"), Rule(r'(?i)introduced', 'bill:introduced'), # Committee referred, reported. Rule(r"Referred to (?P<committees>.+)", 'committee:referred'), Rule(r"Re-(referred|committed) to (?P<committees>.+)", 'committee:referred'), Rule(r'(?i)(re-)?reported', 'committee:passed'), Rule(r'Reported with request to re-refer to (?P<committees>.+)', ['committee:referred', 'committee:passed']), Rule([r'^Amended on', r'as amended'], 'amendment:passed'), # Governor. Rule(r'^Approved by the Governor', 'governor:signed'), Rule(r'^Presented to the Governor', 'governor:received'), # Passage. Rule([r'^Final passage', '^Third consideration and final passage'], 'bill:passed'), Rule(r'(?i)adopted', 'bill:passed'), Rule(r'^First consideration', 'bill:reading:1'), Rule(r'Second consideration', 'bill:reading:2'), Rule(r'Third consideration', 'bill:reading:3'),
u'Regional Jail and Correctional Facility Authority', u'Roads and Transportation', u'Rule-Making Review Committee', u'Senior Citizen Issues', u'Special Investigations', u'Technology', u'Veterans Affairs', u'Veterans Affairs/ Homeland Security', u'Water Resources', u'Workforce Investment for Economic Development', ] committees_rgx = '(%s)' % '|'.join(sorted(committees, key=len, reverse=True)) rules = (Rule( ['Communicated to Senate', 'Senate received', 'Ordered to Senate'], actor='upper'), Rule(['Communicated to House', 'House received', 'Ordered to House'], actor='lower'), Rule('Read 1st time', 'bill:reading:1'), Rule('Read 2nd time', 'bill:reading:2'), Rule('Read 3rd time', 'bill:reading:3'), Rule('Filed for introduction', 'bill:filed'), Rule('^Introduced in', 'bill:introduced'), Rule(['Passed Senate', 'Passed House'], 'bill:passed'), Rule(['Reported do pass', 'With amendment, do pass'], 'committee:passed'), Rule([ u', but first to .+?; then (?P<committees>[^;]+)', u'To (?P<committees>.+?) then' ]), Rule(u'(?i)voice vote', voice_vote=True), Rule([u'Amendment rejected'], [u'amendment:failed']),
from billy.scrape.actions import Rule, BaseCategorizer # These are regex patterns that map to action categories. _categorizer_rules = ( Rule((r'\(Ayes (?P<yes_votes>\d+)\.\s+Noes\s+' r'(?P<no_votes>\d+)\.( Page \S+\.)?\)')), Rule(r'^Introduced', 'bill:introduced'), Rule(r'(?i)Referred to (?P<committees>.+)', 'committee:referred'), Rule(r'(?i)Referred to (?P<committees>.+?)(\.\s+suspense)', 'committee:referred'), Rule(r're-refer to Standing (?P<committees>[^.]+)\.', 'committee:referred'), Rule(r'Read first time\.', 'bill:reading:1'), Rule(r'Read second time and amended', ['bill:reading:2']), Rule(r'Read third time', 'bill:reading:3'), Rule(r'Read third time. Refused passage\.', 'bill:failed'), Rule([r'(?i)read third time.{,5}passed', r'(?i)Read third time.+?Passed'], ['bill:passed', 'bill:reading:3']), Rule(r'Approved by the Governor', 'governor:signed'), Rule(r'Approved by the Governor with item veto', 'governor:vetoed:line-item'), Rule('Vetoed by Governor', 'governor:vetoed'),
import re from nose.tools import eq_, assert_true from billy.scrape.actions import Rule, BaseCategorizer test_actions = ( 'Referred to Committee on Bats; also referred to Fake Committee', 'Goat action: walk around, eat grass.', ) rules = ( # For testing key clobbering. Rule(r'Referred to (?P<committees>Committee on .+?);'), Rule(r'also referred to (?P<committees>.+)'), # For testing stop. Rule(r'walk', ['action:walk'], stop=True), Rule(r'eat', ['action:eatgrass']), # Flexible whitespace. Rule(r'Moo moo', ['action:moo']), # Test attrs Rule(r'Test attrs', species='goat'), # Test multiple types. Rule(r'1', ['1', '2']), Rule(r'3', ['3']),
import re from billy.scrape.actions import Rule, BaseCategorizer rules = ( Rule(r'(?P<yes_votes>\d+)\s+YES\s+(?P<no_votes>\d+)' r'\s+NO\s+(?P<not_voting>.+?)\s+NOT VOTING\s+(?P<absent>.+?)\s+' r'ABSENT\s+(?P<vacant>.+?) VACANT'), Rule([u'Amendment (?P<bills>.+?) -\s+Laid On Table'], ['amendment:tabled']), Rule([u'Favorable'], ['committee:passed:favorable']), Rule([u'(?i)Amendment (?P<bills>.+?) defeated'], ['amendment:failed']), Rule([u'(?i)introduced and adopted in lieu of (?P<bills>.+)'], ['bill:introduced']), Rule([u'(?i)assigned to (?P<committees>.+?) Committee in'], ['committee:referred', 'bill:introduced']), Rule([u'Signed by Governor'], ['governor:signed']), Rule([u'(?i)Amendment (?P<bills>[\w\s]+?) Introduced'], ['amendment:introduced']), Rule([u'Amendment (?P<bills>.+?) - Passed'], ['amendment:passed']), Rule([u'^Passed by'], ['bill:passed']), Rule([u'^Defeated'], ['bill:failed' ]), Rule([u'(?i)unfavorable'], ['committee:passed:unfavorable']), Rule([u'Reported Out of Committee \((?P<committees>.+?)\)'], ['committee:passed']), Rule([u'Vetoed by Governor'], ['governor:vetoed']), Rule([u'(?i)Amendment (?P<bills>.+?)\s+-\s+Introduced'], ['amendment:introduced']), Rule([u'(?i)Amendment (?P<bills>[\w\s]+?) Passed'], ['amendment:passed']), Rule([
from billy.scrape.actions import Rule, BaseCategorizer # These are regex patterns that map to action categories. _categorizer_rules = ( Rule(r'^Filed with ', 'bill:introduced'), Rule(r'^Introduced', 'bill:introduced'), Rule(r'^Second reading', 'bill:reading:2'), Rule(r'passed as amended', 'bill:passed'), Rule(r'passed', 'bill:passed'), Rule(r'^Signed by Governor', 'governor:signed'), ) class NDCategorizer(BaseCategorizer): rules = _categorizer_rules
import re from billy.scrape.actions import Rule, BaseCategorizer rules = ( Rule([(u'(?P<yes_votes>\d+) Yeas - (?P<no_votes>\d+) ' u'Nays- (?P<excused>\d+) Excused - (?P<absent>\d+) Absent'), (u'(?P<yes_votes>\d+) -Yeas, (?P<no_votes>\d+) -Nays, ' u'(?P<excused>\d+) -Excused, (?P<absent>\d+) -Absent'), u'(?P<committees>Committee on .+?) suggested and ordered printed', (u'\(Yeas (?P<yes_votes>\d+) - Nays (?P<no_votes>\d+) - Absent ' u'(?P<absent>\d+) - Excused (?P<excused>\d+)\)( \(Vacancy ' u'(?P<vacant>\d+)\))?')]), Rule([ u'Representative (?P<legislators>.+?) of \S+', u'Senator (?P<legislators>.+?of \S+)', 'Representative (?P<legislators>[A-Z]+?( of [A-Za-z]+))', u'Senator (?P<legislators>\S+ of \S+)', u'Representative [A-Z ]+? of \S+' ]), Rule(u'REFERRED to the (?P<committees>Committee on [A-Z ]+(?![a-z]))', 'committee:referred'), Rule(['READ A SECOND TIME'], ['bill:reading:2']), Rule(['(?i)read once'], ['bill:reading:1']), Rule('(?i)finally passed', 'bill:passed'), Rule('(?i)passed to be enacted', 'bill:passed'), Rule('COMMITTED to the (?P<committees>Committee on .+?)\.', 'committee:referred'), Rule(r'VETO was NOT SUSTAINED', 'bill:veto_override:passed'), Rule(r'VETO was SUSTAINED', 'bill:veto_override:failed'), Rule(r'(?<![Aa]mendment)READ and (PASSED|ADOPTED)(, in concurrence)?\.$', 'bill:passed'))
u'Executive Committee of Legislative Council', u'Transportation', u'Health (?:and|&) Human Services', u'Education', u'Legislative Council', u'Legal Services', u'Capital Development', u'Transportation (?:and|&) Energy', u'Joint Budget Committee', u'Business, Labor, (?:and|&) Technology', u'State, Veterans, (?:and|&) Military Affairs' ] rules = ( Rule('^House', actor='lower'), Rule('^Senate', actor='upper'), Rule('^Introduced in Senate', actor='upper'), Rule('^Introduced in House', actor='lower'), Rule('^Governor', actor='governor'), Rule('Governor Action - Partial Veto', 'governor:vetoed:line-item'), Rule('Sent to the Governor', 'governor:received'), Rule('Governor Action - Signed', 'governor:signed'), Rule('Governor Signed', 'governor:signed'), Rule('Governor Action - Vetoed', 'governor:vetoed'), Rule(r'^Introduced', 'bill:introduced'), Rule(r'Assigned to (?P<committees>.+)'), Rule(u'(?i)refer (un)?amended to (?P<committees>.+)',
Rule([ # Add a bajilion links to entities. r'SCs (changed|removed) (?P<legislators>.+)', r'Conference Committee on (?P<committees>.+)', r'Conference granted, naming:?\s+Conference Committee on (?P<committees>.+)', r'vote by Representative(?P<legislators>.+)', r'amended (?P<committees>.+?) committee', r'coauthored by (?P<legislators>.+)', (r'Remove Senator .+? as principal Senate author ' r'and substitute with Senator (?P<legislators>.+?)'), r'(?i)committee substitute (?P<committees>.+)', r'(?i)remove\s{,10}as\s{,10}author\s{,10}(?P<legislators>.+);', r'(?i)SCs\s{,10}named\s{,10}(?P<legislators>.+)', (r'Pending removal author Senator (?P<legislators>.+?) ' r'and replace with Senator'), r'(?i)Representative\(s\)\s{,10}(?P<legislators>.+)', r'Withdrawn from Calendar; (?P<committees>.+)', (r'Pending removal author Senator .+? and replace ' r'with Senator (?P<legislators>.+)'), r'Ayes:\s+(?P<yes_votes>\d+)\s+Nays:\s+(?P<no_votes>\d+)', (r'remove as principal author Representative .+? and substitute ' r'with Representative (?P<legislators>.+?)'), r'Pending coauthorship Senator\(s\) (?P<legislators>.+)', (r'Remove Representative (?P<legislators>.+?) as principal ' r'House author and substitute with Representative'), r'Pending removal principal author Representative .+? and ' r'replace with Representative (?P<legislators>.+)', r'(?i)(co)?authored\s{,10}by\s{,10}(?P<legislators>.+)', r'Second Reading referred to (?P<committees>.+? Committee)', r'Notice served to reconsider vote on measure (?P<legislators>.+)', (r'Pending removal principal author Representative (?P<legislators>.+) ' r'and replace with Representative .+'), (r'remove as principal author Representative (?P<legislators>.+?) ' r'and substitute with Representative'), r'CR; Do Pass(, as amended,|, amended by)? (?P<committees>.+)', r'coauthor (Senator|Representative) (?P<legislators>.+)', r'Ayes:\s+(?P<yes_votes>\d+)\s+Nays:\s+(?P<no_votes>\d+)']),