def test_bill_sponsor_by_identifier(): create_jurisdiction() org = create_org() bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', chamber='lower') bill.add_sponsorship_by_identifier(name="SNODGRASS", classification='sponsor', entity_type='person', primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") oi = OrganizationImporter('jid') pi = PersonImporter('jid') zs = ScrapePerson(name='Zadock Snodgrass') zs.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) BillImporter('jid', oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry, ) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass"
def test_bill_sponsor_by_identifier(): create_jurisdiction() org = create_org() bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', chamber='lower') bill.add_sponsorship_by_identifier(name="SNODGRASS", classification='sponsor', entity_type='person', primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") oi = OrganizationImporter('jid') pi = PersonImporter('jid') zs = ScrapePerson(name='Zadock Snodgrass') zs.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) BillImporter('jid', oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry,) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass"
def test_bill_sponsor_limit_lookup(): create_jurisdiction() org = create_org() bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', chamber='lower') bill.add_sponsorship_by_identifier(name="SNODGRASS", classification='sponsor', entity_type='person', primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") oi = OrganizationImporter('jid') pi = PersonImporter('jid') zs = ScrapePerson(name='Zadock Snodgrass', birth_date="1800-01-01") zs.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) zs2 = ScrapePerson(name='Zadock Snodgrass', birth_date="1900-01-01") zs2.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') # This is contrived and perhaps broken, but we're going to check this. # We *really* don't want to *ever* cross jurisdiction bounds. PersonImporter('another-jurisdiction').import_data([zs.as_dict()]) BillImporter('jid', oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry, ) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass" assert entry.person.birth_date == "1800-01-01"
def test_bill_sponsor_limit_lookup(): create_jurisdiction() org = create_org() bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', chamber='lower') bill.add_sponsorship_by_identifier(name="SNODGRASS", classification='sponsor', entity_type='person', primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") oi = OrganizationImporter('jid') pi = PersonImporter('jid') zs = ScrapePerson(name='Zadock Snodgrass', birth_date="1800-01-01") zs.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) zs2 = ScrapePerson(name='Zadock Snodgrass', birth_date="1900-01-01") zs2.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') # This is contrived and perhaps broken, but we're going to check this. # We *really* don't want to *ever* cross jurisdiction bounds. PersonImporter('another-jurisdiction').import_data([zs.as_dict()]) BillImporter('jid', oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry,) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass" assert entry.person.birth_date == "1800-01-01"
def scrape_bills(self): """ Does the following 1) Scrapes bill data from unitedstates project and saves the data to path specified in UnitedStates module 2) Iterates over bill data and converts each one to an OCD-compliant bill model. 3) Yields the OCD-compliant bill model instance @return: yield Bill instance """ # run scraper first to pull in all the bill data self.run_unitedstates_bill_scraper() # iterate over all the files and build and yield Bill objects for filename in find_files(settings.SCRAPED_DATA_DIR, '.*[a-z]*\/[a-z]*[0-9]*\/data\.json'): try: with open(filename) as json_file: json_data = json.load(json_file) # Initialize Object bill = Bill(self.TYPE_MAP[json_data['bill_type']]['canonical'] + ' ' + json_data['number'], json_data['congress'], json_data['official_title'], chamber=self.TYPE_MAP[json_data['bill_type']]['chamber'] ) # Basics bill.type = [json_data['bill_type']] bill.subject = json_data['subjects'] bill.add_summary(json_data['summary']['as'], json_data['summary']['text'], json_data['summary']['date']) # Common Fields bill.sources = [{'url': json_data['url'], 'note': 'all'}] # Other/Related Bills bill.other_titles = [{'note': t['type'], 'title': t['title']} for t in json_data['titles']] # change value of relationship_type to 'type' field from json_data when permitted by schema bill.related_bills = [{'session': b['session'], 'name': b['name'], 'relationship_type':'companion'} for b in json_data['related_bills']] # add primary sponsor bill.add_sponsorship_by_identifier(json_data['sponsor']['name'], 'person', 'person', True, scheme='thomas_id', identifier=json_data['sponsor']['thomas_id'], chamber=self.TYPE_MAP[json_data['bill_type']]['chamber']) # add cosponsors for cs in json_data['cosponsors']: bill.add_sponsorship_by_identifier(cs['name'], 'person', 'person', False, scheme='thomas_id', identifier=cs['thomas_id'], chamber=self.TYPE_MAP[json_data['bill_type']]['chamber']) # add introduced_at and actions bill.actions.append({'date': json_data['introduced_at'], 'type': 'introduced', 'description': 'date of introduction', 'actor': self.TYPE_MAP[json_data['bill_type']]['chamber'], 'related_entities': []}) for action in json_data['actions']: bill.actions.append({'date': action['acted_at'], 'type': [action['type']], 'description': action['text'], 'actor': self.TYPE_MAP[json_data['bill_type']]['chamber'], 'related_entities': [] }) # add bill versions for version_path in find_files(os.path.join(settings.SCRAPED_DATA_DIR, 'data', bill.session, 'bills', json_data['bill_type'], json_data['bill_type'] + json_data['number'], 'text-versions'), '*\.json'): try: with open(version_path) as version_file: version_json_data = json.load(version_file) for k, v in version_json_data['urls'].iteritems(): bill.versions.append({'date': version_json_data['issued_on'], 'type': version_json_data['version_code'], 'name': self.VERSION_MAP[version_json_data['version_code']], 'links': [{'mimetype': k, 'url': v}]}) except IOError: print("Unable to open or parse file with path " + version_path) continue yield bill except IOError: print("Unable to open or parse file with path " + filename) continue
def scrape_bills(self): """ Does the following 1) Scrapes bill data from unitedstates project and saves the data to path specified in UnitedStates module 2) Iterates over bill data and converts each one to an OCD-compliant bill model. 3) Yields the OCD-compliant bill model instance @return: yield Bill instance """ # run scraper first to pull in all the bill data self.run_unitedstates_bill_scraper() # iterate over all the files and build and yield Bill objects for filename in find_files(settings.SCRAPED_DATA_DIR, '.*[a-z]*\/[a-z]*[0-9]*\/data\.json'): try: with open(filename) as json_file: json_data = json.load(json_file) # Initialize Object bill = Bill( self.TYPE_MAP[json_data['bill_type']]['canonical'] + ' ' + json_data['number'], json_data['congress'], json_data['official_title'], chamber=self.TYPE_MAP[ json_data['bill_type']]['chamber']) # Basics bill.type = [json_data['bill_type']] bill.subject = json_data['subjects'] bill.add_summary(json_data['summary']['as'], json_data['summary']['text'], json_data['summary']['date']) # Common Fields bill.sources = [{'url': json_data['url'], 'note': 'all'}] # Other/Related Bills bill.other_titles = [{ 'note': t['type'], 'title': t['title'] } for t in json_data['titles']] # change value of relationship_type to 'type' field from json_data when permitted by schema bill.related_bills = [{ 'session': b['session'], 'name': b['name'], 'relationship_type': 'companion' } for b in json_data['related_bills']] # add primary sponsor bill.add_sponsorship_by_identifier( json_data['sponsor']['name'], 'person', 'person', True, scheme='thomas_id', identifier=json_data['sponsor']['thomas_id'], chamber=self.TYPE_MAP[ json_data['bill_type']]['chamber']) # add cosponsors for cs in json_data['cosponsors']: bill.add_sponsorship_by_identifier( cs['name'], 'person', 'person', False, scheme='thomas_id', identifier=cs['thomas_id'], chamber=self.TYPE_MAP[ json_data['bill_type']]['chamber']) # add introduced_at and actions bill.actions.append({ 'date': json_data['introduced_at'], 'type': 'introduced', 'description': 'date of introduction', 'actor': self.TYPE_MAP[json_data['bill_type']]['chamber'], 'related_entities': [] }) for action in json_data['actions']: bill.actions.append({ 'date': action['acted_at'], 'type': [action['type']], 'description': action['text'], 'actor': self.TYPE_MAP[json_data['bill_type']]['chamber'], 'related_entities': [] }) # add bill versions for version_path in find_files( os.path.join( settings.SCRAPED_DATA_DIR, 'data', bill.session, 'bills', json_data['bill_type'], json_data['bill_type'] + json_data['number'], 'text-versions'), '*\.json'): try: with open(version_path) as version_file: version_json_data = json.load(version_file) for k, v in version_json_data[ 'urls'].iteritems(): bill.versions.append({ 'date': version_json_data['issued_on'], 'type': version_json_data['version_code'], 'name': self.VERSION_MAP[ version_json_data['version_code']], 'links': [{ 'mimetype': k, 'url': v }] }) except IOError: print("Unable to open or parse file with path " + version_path) continue yield bill except IOError: print("Unable to open or parse file with path " + filename) continue
def _scrape_bills(self): """ Does the following 1) Scrapes bill data from unitedstates project and saves the data to path specified in UnitedStates module 2) Iterates over bill data and converts each one to an OCD-compliant bill model. 3) Yields the OCD-compliant bill model instance @return: generator for federal US bills in OCD-compliant format @rtype: generator """ # run scraper first to pull in all the bill data self._run_unitedstates_bill_scraper() # iterate over all the files and build and yield Bill objects for filename in find_files(settings.SCRAPED_DATA_DIR, '.*/data/[0-9]+/bills/[^\/]+/[^\/]+/data.json'): try: with open(filename) as json_file: json_data = json.load(json_file) # Initialize Object bill = Bill(constants.TYPE_MAP[json_data['bill_type']]['canonical'] + ' ' + json_data['number'], json_data['congress'], json_data['official_title'], chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber'] ) # add source of data bill.add_source(json_data['url'], note='all') # add subjects for subject in json_data['subjects']: bill.add_subject(subject) # add summary if 'summary' in json_data and json_data['summary'] is not None: bill.add_abstract(json_data['summary']['text'], json_data['summary']['as'], json_data['summary']['date']) # add titles for item in json_data['titles']: bill.add_title(item['title'], item['type']) # add other/related Bills for b in json_data['related_bills']: if 'type' in b and b['type'] == 'bill': split = b['bill_id'].split('-') m = UnitedStatesBillScraper.BILL_SPLIT.match(split[0]) bill.add_related_bill(constants.TYPE_MAP[m.group(1)]['canonical'] + ' ' + m.group(2), legislative_session=split[1], relation_type='companion') # add sponsor bill.add_sponsorship_by_identifier(json_data['sponsor']['name'], 'person', 'person', True, scheme='thomas_id', identifier=json_data['sponsor']['thomas_id'], chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber']) # add cosponsors for cs in json_data['cosponsors']: bill.add_sponsorship_by_identifier(cs['name'], 'person', 'person', False, scheme='thomas_id', identifier=cs['thomas_id'], chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber']) # add introduced_at and actions bill.add_action('date of introduction', datetime_to_date(json_data['introduced_at']), chamber=constants.TYPE_MAP[json_data['bill_type']]['chamber'], related_entities=[]) # add other actions for action in json_data['actions']: bill.actions.append({'date': datetime_to_date(action['acted_at']), 'type': [action['type']], 'description': action['text'], 'actor': constants.TYPE_MAP[json_data['bill_type']]['chamber'], 'related_entities': [] }) # add bill versions for version_path in find_files(os.path.join(settings.SCRAPED_DATA_DIR, 'data', bill.legislative_session, 'bills', json_data['bill_type'], json_data['bill_type'] + json_data['number'], 'text-versions'), '/.*/*\.json'): try: with open(version_path) as version_file: version_json_data = json.load(version_file) for k, v in version_json_data['urls'].items(): bill.versions.append({'date': datetime_to_date(version_json_data['issued_on']), 'type': version_json_data['version_code'], 'name': constants.VERSION_MAP[version_json_data['version_code']], 'links': [{'mimetype': k, 'url': v}]}) except IOError: print("Unable to open or parse file with path " + version_path) continue # finally yield bill object yield bill except IOError: print("Unable to open file with path " + filename) print(traceback.format_exc()) continue except KeyError: print("Unable to parse file with path " + filename) print(traceback.format_exc()) continue except: print('Unknown error with ' + filename) print(traceback.format_exc()) continue