def do_import(self, juris, args): datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) #bill_importer = BillImporter(juris.jurisdiction_id, org_importer) #vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer, # bill_importer) #event_importer = EventImporter(juris.jurisdiction_id) report = {} # TODO: wrap in a transaction report.update(juris_importer.import_directory(datadir)) report.update(org_importer.import_directory(datadir)) report.update(person_importer.import_directory(datadir)) report.update(post_importer.import_directory(datadir)) report.update(membership_importer.import_directory(datadir)) #report.update(bill_importer.import_from_json(datadir)) #report.update(event_importer.import_from_json(datadir)) #report.update(vote_importer.import_from_json(datadir)) return report
def test_jurisdiction_merge_related(): Division.objects.create(id='ocd-division/country:us', name='USA') # need to ensure legislative_sessions don't get deleted ji = JurisdictionImporter('jurisdiction-id') tj = FakeJurisdiction() ji.import_item(tj.as_dict()) assert LegislativeSession.objects.count() == 2 # disallow deletion of legislative sessions as it can remove bills tj.legislative_sessions.pop() ji.import_item(tj.as_dict()) # should still have two assert LegislativeSession.objects.count() == 2 # now will have three tj.legislative_sessions.append({ 'identifier': '2017', 'name': '2017 Session' }) ji.import_item(tj.as_dict()) assert LegislativeSession.objects.count() == 3 # and test that the non-identifier fields actually update tj.legislative_sessions.append({'identifier': '2016', 'name': 'updated'}) ji.import_item(tj.as_dict()) assert LegislativeSession.objects.count() == 3 assert LegislativeSession.objects.get(identifier='2016').name == 'updated'
def do_import(stream, transaction): stream = list(stream) jurisdiction_id = transaction.jurisdiction.id juris_importer = JurisdictionImporter(jurisdiction_id) org_importer = OrganizationImporter(jurisdiction_id) person_importer = PersonImporter(jurisdiction_id) post_importer = PostImporter(jurisdiction_id, org_importer) membership_importer = MembershipImporter(jurisdiction_id, person_importer, org_importer, post_importer) report = {} # This basically relates to Pupa's pupa.clu.commands.update:113 # (From there - wrap this in a transaction.) def tfilter(otype, stream): for el in filter(lambda x: isinstance(x, otype), stream): yield el.as_dict() report.update(juris_importer.import_data(tfilter(Jurisdiction, stream))) report.update(org_importer.import_data(tfilter(Organization, stream))) report.update(person_importer.import_data(tfilter(Person, stream))) report.update(post_importer.import_data(tfilter(Post, stream))) report.update(membership_importer.import_data(tfilter(Membership, stream))) return report
def do_import(stream, transaction): stream = list(stream) jurisdiction_id = transaction.jurisdiction.id juris_importer = JurisdictionImporter(jurisdiction_id) org_importer = OrganizationImporter(jurisdiction_id) person_importer = PersonImporter(jurisdiction_id) post_importer = PostImporter(jurisdiction_id, org_importer) membership_importer = MembershipImporter( jurisdiction_id, person_importer, org_importer, post_importer ) report = {} # This basically relates to Pupa's pupa.clu.commands.update:113 # (From there - wrap this in a transaction.) def tfilter(otype, stream): for el in filter(lambda x: isinstance(x, otype), stream): yield el.as_dict() report.update(juris_importer.import_data(tfilter(Jurisdiction, stream))) report.update(org_importer.import_data(tfilter(Organization, stream))) report.update(person_importer.import_data(tfilter(Person, stream))) report.update(post_importer.import_data(tfilter(Post, stream))) report.update(membership_importer.import_data( tfilter(Membership, stream))) return report
def do_import(self, juris, args): # import inside here because to avoid loading Django code unnecessarily from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteEventImporter, EventImporter) from pupa.reports import generate_session_report from pupa.models import SessionDataQualityReport datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_event_importer = VoteEventImporter(juris.jurisdiction_id, person_importer, org_importer, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, org_importer, person_importer, bill_importer, vote_event_importer) report = {} with transaction.atomic(): print('import jurisdictions...') report.update(juris_importer.import_directory(datadir)) if settings.ENABLE_PEOPLE_AND_ORGS: print('import organizations...') report.update(org_importer.import_directory(datadir)) print('import people...') report.update(person_importer.import_directory(datadir)) print('import posts...') report.update(post_importer.import_directory(datadir)) print('import memberships...') report.update(membership_importer.import_directory(datadir)) if settings.ENABLE_BILLS: print('import bills...') report.update(bill_importer.import_directory(datadir)) if settings.ENABLE_EVENTS: print('import events...') report.update(event_importer.import_directory(datadir)) if settings.ENABLE_VOTES: print('import vote events...') report.update(vote_event_importer.import_directory(datadir)) # compile info on all sessions that were updated in this run seen_sessions = set() seen_sessions.update(bill_importer.get_seen_sessions()) seen_sessions.update(vote_event_importer.get_seen_sessions()) for session in seen_sessions: new_report = generate_session_report(session) with transaction.atomic(): SessionDataQualityReport.objects.filter(legislative_session=session).delete() new_report.save() return report
def test_jurisdiction_merge_related(): Division.objects.create(id='ocd-division/country:us', name='USA') # need to ensure legislative_sessions don't get deleted ji = JurisdictionImporter('jurisdiction-id') tj = FakeJurisdiction() ji.import_item(tj.as_dict()) assert LegislativeSession.objects.count() == 2 # disallow deletion of legislative sessions as it can remove bills tj.legislative_sessions.pop() ji.import_item(tj.as_dict()) # should still have two assert LegislativeSession.objects.count() == 2 # now will have three tj.legislative_sessions.append({'identifier': '2017', 'name': '2017 Session'}) ji.import_item(tj.as_dict()) assert LegislativeSession.objects.count() == 3 # and test that the non-identifier fields actually update tj.legislative_sessions.append({'identifier': '2016', 'name': 'updated'}) ji.import_item(tj.as_dict()) assert LegislativeSession.objects.count() == 3 assert LegislativeSession.objects.get(identifier='2016').name == 'updated'
def do_import(self, juris, args): # import inside here because to avoid loading Django code unnecessarily from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteImporter, EventImporter, DisclosureImporter) datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_importer = VoteImporter(juris.jurisdiction_id, person_importer, org_importer, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, org_importer, person_importer) disclosure_importer = DisclosureImporter(juris.jurisdiction_id, org_importer, person_importer, event_importer) report = {} with transaction.atomic(): print('import jurisdictions...') report.update(juris_importer.import_directory(datadir)) print('import organizations...') report.update(org_importer.import_directory(datadir)) print('import people...') report.update(person_importer.import_directory(datadir)) print('import posts...') report.update(post_importer.import_directory(datadir)) print('import memberships...') report.update(membership_importer.import_directory(datadir)) print('import bills...') report.update(bill_importer.import_directory(datadir)) print('import events...') report.update(event_importer.import_directory(datadir)) print('import disclosures...') report.update(disclosure_importer.import_directory(datadir)) print('import votes...') report.update(vote_importer.import_directory(datadir)) return report
def test_jurisdiction_update(): tj = FakeJurisdiction() ji = JurisdictionImporter('jurisdiction-id') _, what = ji.import_item(tj.as_dict()) assert what == 'insert' _, what = ji.import_item(tj.as_dict()) assert what == 'noop' assert Jurisdiction.objects.count() == 1 tj.name = 'different name' obj, what = ji.import_item(tj.as_dict()) assert what == 'update' assert Jurisdiction.objects.count() == 1 assert obj.name == 'different name'
def test_jurisdiction_update(): tj = FakeJurisdiction() ji = JurisdictionImporter('jurisdiction-id') _, what = ji.import_item(tj.as_dict()) assert what == 'insert' _, what = ji.import_item(tj.as_dict()) assert what == 'noop' assert Jurisdiction.objects.count() == 1 tj.name = 'different name' obj, what = ji.import_item(tj.as_dict()) assert what == 'update' assert Jurisdiction.objects.count() == 1 assert Jurisdiction.objects.get().name == 'different name'
def test_jurisdiction_update(): Division.objects.create(id='ocd-division/country:us', name='USA') tj = FakeJurisdiction() ji = JurisdictionImporter('jurisdiction-id') _, what = ji.import_item(tj.as_dict()) assert what == 'insert' _, what = ji.import_item(tj.as_dict()) assert what == 'noop' assert Jurisdiction.objects.count() == 1 tj.name = 'different name' obj, what = ji.import_item(tj.as_dict()) assert what == 'update' assert Jurisdiction.objects.count() == 1 assert Jurisdiction.objects.get().name == 'different name'
def do_import(self, juris, args): # import inside here because to avoid loading Django code unnecessarily from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteEventImporter, EventImporter) datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_event_importer = VoteEventImporter(juris.jurisdiction_id, person_importer, org_importer, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, org_importer, person_importer, bill_importer, vote_event_importer) report = {} with transaction.atomic(): print('import jurisdictions...') report.update(juris_importer.import_directory(datadir)) print('import organizations...') report.update(org_importer.import_directory(datadir)) print('import people...') report.update(person_importer.import_directory(datadir)) print('import posts...') report.update(post_importer.import_directory(datadir)) print('import memberships...') report.update(membership_importer.import_directory(datadir)) print('import bills...') report.update(bill_importer.import_directory(datadir)) print('import events...') report.update(event_importer.import_directory(datadir)) print('import vote events...') report.update(vote_event_importer.import_directory(datadir)) return report
def test_jurisdiction_import(): tj = FakeJurisdiction() juris_dict = tj.as_dict() JurisdictionImporter('jurisdiction-id').import_data([juris_dict]) dbj = Jurisdiction.objects.get() assert dbj.id == tj.jurisdiction_id assert dbj.division_id == tj.division_id assert dbj.name == tj.name assert dbj.url == tj.url
def test_jurisdiction_import(): Division.objects.create(id='ocd-division/country:us', name='USA') tj = FakeJurisdiction() juris_dict = tj.as_dict() JurisdictionImporter('jurisdiction-id').import_data([juris_dict]) dbj = Jurisdiction.objects.get() assert dbj.id == tj.jurisdiction_id assert dbj.division_id == tj.division_id assert dbj.name == tj.name assert dbj.url == tj.url
def do_import(self, juris, args): # import inside here because to avoid loading Django code unnecessarily from pupa.importers import (JurisdictionImporter, OrganizationImporter, PersonImporter, PostImporter, MembershipImporter, BillImporter, VoteEventImporter, EventImporter) from pupa.reports import generate_session_report from pupa.models import SessionDataQualityReport datadir = os.path.join(settings.SCRAPED_DATA_DIR, args.module) juris_importer = JurisdictionImporter(juris.jurisdiction_id) org_importer = OrganizationImporter(juris.jurisdiction_id) person_importer = PersonImporter(juris.jurisdiction_id) post_importer = PostImporter(juris.jurisdiction_id, org_importer) membership_importer = MembershipImporter(juris.jurisdiction_id, person_importer, org_importer, post_importer) bill_importer = BillImporter(juris.jurisdiction_id, org_importer, person_importer) vote_event_importer = VoteEventImporter(juris.jurisdiction_id, person_importer, org_importer, bill_importer) event_importer = EventImporter(juris.jurisdiction_id, org_importer, person_importer, bill_importer, vote_event_importer) report = {} with transaction.atomic(): print('import jurisdictions...') report.update(juris_importer.import_directory(datadir)) if settings.ENABLE_PEOPLE_AND_ORGS: print('import organizations...') report.update(org_importer.import_directory(datadir)) print('import people...') report.update(person_importer.import_directory(datadir)) print('import posts...') report.update(post_importer.import_directory(datadir)) print('import memberships...') report.update(membership_importer.import_directory(datadir)) if settings.ENABLE_BILLS: print('import bills...') report.update(bill_importer.import_directory(datadir)) if settings.ENABLE_EVENTS: print('import events...') report.update(event_importer.import_directory(datadir)) if settings.ENABLE_VOTES: print('import vote events...') report.update(vote_event_importer.import_directory(datadir)) # compile info on all sessions that were updated in this run seen_sessions = set() seen_sessions.update(bill_importer.get_seen_sessions()) seen_sessions.update(vote_event_importer.get_seen_sessions()) for session in seen_sessions: new_report = generate_session_report(session) with transaction.atomic(): SessionDataQualityReport.objects.filter( legislative_session=session).delete() new_report.save() return report