def test_multiple_orgs_of_same_class(): """ We should be able to set memberships on organizations with the same classification within the same jurisdictions """ Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") Organization.objects.create(id="fdr", name="Federation", classification="foundation", jurisdiction_id="fnd-jid") hari = ScrapePerson('Hari Seldon', primary_org='foundation', role='founder', primary_org_name='Foundation') picard = ScrapePerson('Jean Luc Picard', primary_org='foundation', role='founder', primary_org_name='Federation') person_imp = PersonImporter('fnd-jid') person_imp.import_data([hari.as_dict()]) person_imp.import_data([picard.as_dict()]) # try to import a membership org_imp = OrganizationImporter('fnd-jid') dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', person_imp, org_imp, dumb_imp) memimp.import_data([hari._related[0].as_dict(), picard._related[0].as_dict()]) assert Person.objects.get(name='Hari Seldon').memberships.get().organization.name == 'Foundation' assert Person.objects.get(name='Jean Luc Picard').memberships.get().organization.name == 'Federation'
def test_multiple_orgs_of_same_class(): """ We should be able to set memberships on organizations with the same classification within the same jurisdictions """ create_jurisdiction() Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") Organization.objects.create(id="fdr", name="Federation", classification="foundation", jurisdiction_id="fnd-jid") hari = ScrapePerson('Hari Seldon', primary_org='foundation', role='founder', primary_org_name='Foundation') picard = ScrapePerson('Jean Luc Picard', primary_org='foundation', role='founder', primary_org_name='Federation') person_imp = PersonImporter('fnd-jid') person_imp.import_data([hari.as_dict()]) person_imp.import_data([picard.as_dict()]) # try to import a membership org_imp = OrganizationImporter('fnd-jid') dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', person_imp, org_imp, dumb_imp) memimp.import_data([hari._related[0].as_dict(), picard._related[0].as_dict()]) assert Person.objects.get(name='Hari Seldon').memberships.get().organization.name == 'Foundation' assert Person.objects.get(name='Jean Luc Picard').memberships.get().organization.name == 'Federation'
def test_same_name_people_other_name(): # ensure we're taking other_names into account for the name collision code o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id') p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1') p2 = ScrapePerson('Rock', image='http://example.com/2') p2.add_name('Dwayne Johnson') # the people have the same name but are apparently different with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()])
def test_same_name_people_other_name(): create_jurisdiction() # ensure we're taking other_names into account for the name collision code Organization.objects.create(name='WWE', jurisdiction_id='jid') p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1') p2 = ScrapePerson('Rock', image='http://example.com/2') p2.add_name('Dwayne Johnson') # the people have the same name but are apparently different with pytest.raises(SameNameError): PersonImporter('jid').import_data([p1.as_dict(), p2.as_dict()])
def test_full_vote_event(): j = Jurisdiction.objects.create(id='jid', division_id='did') j.legislative_sessions.create(name='1900', identifier='1900') sp1 = ScrapePerson('John Smith', primary_org='lower') sp2 = ScrapePerson('Adam Smith', primary_org='lower') org = ScrapeOrganization(name='House', classification='lower') bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', from_organization=org._id) vote_event = ScrapeVoteEvent(legislative_session='1900', motion_text='passage', start_date='1900-04-01', classification='passage:bill', result='pass', bill_chamber='lower', bill='HB 1', organization=org._id) vote_event.set_count('yes', 20) vote_event.yes('John Smith') vote_event.no('Adam Smith') oi = OrganizationImporter('jid') oi.import_data([org.as_dict()]) pi = PersonImporter('jid') pi.import_data([sp1.as_dict(), sp2.as_dict()]) mi = MembershipImporter('jid', pi, oi, DumbMockImporter()) mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()]) bi = BillImporter('jid', oi, pi) bi.import_data([bill.as_dict()]) VoteEventImporter('jid', pi, oi, bi).import_data([vote_event.as_dict()]) assert VoteEvent.objects.count() == 1 ve = VoteEvent.objects.get() assert ve.legislative_session == LegislativeSession.objects.get() assert ve.motion_classification == ['passage:bill'] assert ve.bill == Bill.objects.get() count = ve.counts.get() assert count.option == 'yes' assert count.value == 20 votes = list(ve.votes.all()) assert len(votes) == 2 for v in ve.votes.all(): if v.voter_name == 'John Smith': assert v.option == 'yes' assert v.voter == Person.objects.get(name='John Smith') else: assert v.option == 'no' assert v.voter == Person.objects.get(name='Adam Smith')
def test_same_name_people(): o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id') # importing two people with the same name to a pristine database should error p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1') p2 = ScrapePerson('Dwayne Johnson', image='http://example.com/2') with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data( [p1.as_dict(), p2.as_dict()]) # importing one person should pass PersonImporter('jurisdiction-id').import_data([p1.as_dict()]) # create fake memberships so that future lookups work on the imported people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # importing another person with the same name should fail with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data( [p1.as_dict(), p2.as_dict()]) # adding birth dates should pass p1.birth_date = '1970' p2.birth_date = '1930' resp = PersonImporter('jurisdiction-id').import_data( [p1.as_dict(), p2.as_dict()]) assert resp['person']['insert'] == 1 assert resp['person']['noop'] == 0 assert resp['person']['update'] == 1 assert Person.objects.count() == 2 # create fake memberships so that future lookups work on the imported people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # adding a third person with the same name but without a birthday should error p3 = ScrapePerson('Dwayne Johnson', image='http://example.com/3') with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data([p3.as_dict()]) # and now test that an update works and we can insert a new one with the same name p1.image = 'http://example.com/1.jpg' p2.birth_date = '1931' # change birth_date, means a new insert resp = PersonImporter('jurisdiction-id').import_data( [p1.as_dict(), p2.as_dict()]) assert Person.objects.count() == 3 assert resp['person']['insert'] == 1 assert resp['person']['noop'] == 0 assert resp['person']['update'] == 1
def test_bill_sponsor_by_identifier(): create_jurisdiction() org = create_org() bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', chamber='lower') bill.add_sponsorship_by_identifier(name="SNODGRASS", classification='sponsor', entity_type='person', primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") oi = OrganizationImporter('jid') pi = PersonImporter('jid') zs = ScrapePerson(name='Zadock Snodgrass') zs.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) BillImporter('jid', oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry,) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass"
def test_deduplication_no_jurisdiction_overlap(): create_person() # make sure we get a new person if we're in a different org person = ScrapePerson('Dwayne Johnson') pd = person.as_dict() PersonImporter('new-jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 2
def test_bill_sponsor_by_identifier(): create_jurisdiction() org = create_org() bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', chamber='lower') bill.add_sponsorship_by_identifier(name="SNODGRASS", classification='sponsor', entity_type='person', primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") oi = OrganizationImporter('jid') pi = PersonImporter('jid') zs = ScrapePerson(name='Zadock Snodgrass') zs.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) BillImporter('jid', oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry, ) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass"
def test_deduplication_other_name_exists(): create_person() # Rocky is already saved in other_names person = ScrapePerson('Rocky') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 1
def test_deduplication_same_name(): create_person() # simplest case- just the same name person = ScrapePerson('Dwayne Johnson') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 1
def test_deduplication_other_name_exists(): create_person() # Rocky is already saved in other_names person = ScrapePerson('Rocky') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 1
def test_full_person(): person = ScrapePerson('Tom Sawyer') person.add_identifier('1') person.add_name('Tommy', start_date='1880') person.add_contact_detail(type='phone', value='555-555-1234', note='this is fake') person.add_link('http://example.com/link') person.add_source('http://example.com/source') # import person pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) # get person from db and assert it imported correctly p = Person.objects.get() assert 'ocd-person' in p.id assert p.name == person.name assert p.identifiers.all()[0].identifier == '1' assert p.identifiers.all()[0].scheme == '' assert p.other_names.all()[0].name == 'Tommy' assert p.other_names.all()[0].start_date == '1880' assert p.contact_details.all()[0].type == 'phone' assert p.contact_details.all()[0].value == '555-555-1234' assert p.contact_details.all()[0].note == 'this is fake' assert p.links.all()[0].url == 'http://example.com/link' assert p.sources.all()[0].url == 'http://example.com/source'
def test_deduplication_no_name_overlap(): create_person() # make sure we're not just being ridiculous and avoiding importing anything in the same org person = ScrapePerson('CM Punk') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 2
def test_deduplication_no_name_overlap(): create_person() # make sure we're not just being ridiculous and avoiding importing anything in the same org person = ScrapePerson('CM Punk') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 2
def test_deduplication_same_name(): create_person() # simplest case- just the same name person = ScrapePerson('Dwayne Johnson') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 1
def test_full_person(): person = ScrapePerson('Tom Sawyer') person.add_identifier('1') person.add_name('Tommy', start_date='1880') person.add_contact_detail(type='phone', value='555-555-1234', note='this is fake') person.add_link('http://example.com/link') person.add_source('http://example.com/source') # import person pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) # get person from db and assert it imported correctly p = Person.objects.get() assert 'ocd-person' in p.id assert p.name == person.name assert p.identifiers.all()[0].identifier == '1' assert p.identifiers.all()[0].scheme == '' assert p.other_names.all()[0].name == 'Tommy' assert p.other_names.all()[0].start_date == '1880' assert p.contact_details.all()[0].type == 'phone' assert p.contact_details.all()[0].value == '555-555-1234' assert p.contact_details.all()[0].note == 'this is fake' assert p.links.all()[0].url == 'http://example.com/link' assert p.sources.all()[0].url == 'http://example.com/source'
def test_deduplication_no_jurisdiction_overlap(): create_person() # make sure we get a new person if we're in a different org person = ScrapePerson('Dwayne Johnson') pd = person.as_dict() PersonImporter('new-jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 2
def test_no_membership_for_person_including_party(): """ even though party is specified we should still get a no memberships error because it doesn't bind the person to a jurisdiction, thus causing duplication """ create_jurisdiction() Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") Organization.objects.create(id="dem", name="Democratic", classification="party") # import a person with no memberships p = ScrapePerson('a man without a country', party='Democratic') person_imp = PersonImporter('fnd-jid') org_imp = OrganizationImporter('fnd-jid') person_imp.import_data([p.as_dict()]) # try to import a membership dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', person_imp, org_imp, dumb_imp) with pytest.raises(NoMembershipsError): memimp.import_data([p._related[0].as_dict()])
def test_invalid_fields_related_item(): p1 = ScrapePerson('Dwayne') p1.add_link('http://example.com') p1 = p1.as_dict() p1['links'][0]['test'] = 3 with pytest.raises(DataImportError): PersonImporter('jid').import_data([p1])
def test_deduplication_other_name_overlaps(): create_person() # Person has other_name that overlaps w/ existing name person = ScrapePerson('The Rock') person.add_name('Dwayne Johnson') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 1
def test_deduplication_other_name_overlaps(): create_person() # Person has other_name that overlaps w/ existing name person = ScrapePerson('The Rock') person.add_name('Dwayne Johnson') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) assert Person.objects.all().count() == 1
def test_invalid_fields_related_item(): p1 = ScrapePerson('Dwayne') p1.add_link('http://example.com') p1 = p1.as_dict() p1['links'][0]['test'] = 3 with pytest.raises(DataImportError): PersonImporter('jid').import_data([p1])
def test_full_vote_event(): j = Jurisdiction.objects.create(id='jid', division_id='did') j.legislative_sessions.create(name='1900', identifier='1900') sp1 = ScrapePerson('John Smith', primary_org='lower') sp2 = ScrapePerson('Adam Smith', primary_org='lower') org = ScrapeOrganization(name='House', classification='lower') bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', from_organization=org._id) vote_event = ScrapeVoteEvent(legislative_session='1900', motion_text='passage', start_date='1900-04-01', classification='passage:bill', result='pass', bill_chamber='lower', bill='HB 1', organization=org._id) vote_event.set_count('yes', 20) vote_event.yes('John Smith') vote_event.no('Adam Smith') oi = OrganizationImporter('jid') oi.import_data([org.as_dict()]) pi = PersonImporter('jid') pi.import_data([sp1.as_dict(), sp2.as_dict()]) mi = MembershipImporter('jid', pi, oi, DumbMockImporter()) mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()]) bi = BillImporter('jid', oi, pi) bi.import_data([bill.as_dict()]) VoteEventImporter('jid', pi, oi, bi).import_data([vote_event.as_dict()]) assert VoteEvent.objects.count() == 1 ve = VoteEvent.objects.get() assert ve.legislative_session == LegislativeSession.objects.get() assert ve.motion_classification == ['passage:bill'] assert ve.bill == Bill.objects.get() count = ve.counts.get() assert count.option == 'yes' assert count.value == 20 votes = list(ve.votes.all()) assert len(votes) == 2 for v in ve.votes.all(): if v.voter_name == 'John Smith': assert v.option == 'yes' assert v.voter == Person.objects.get(name='John Smith') else: assert v.option == 'no' assert v.voter == Person.objects.get(name='Adam Smith')
def test_same_name_people(): o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id') # importing two people with the same name to a pristine database should error p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1') p2 = ScrapePerson('Dwayne Johnson', image='http://example.com/2') with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()]) # importing one person should pass PersonImporter('jurisdiction-id').import_data([p1.as_dict()]) # create fake memberships so that future lookups work on the imported people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # importing another person with the same name should fail with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()]) # adding birth dates should pass p1.birth_date = '1970' p2.birth_date = '1930' resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()]) assert resp['person']['insert'] == 1 assert resp['person']['noop'] == 0 assert resp['person']['update'] == 1 assert Person.objects.count() == 2 # create fake memberships so that future lookups work on the imported people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # adding a third person with the same name but without a birthday should error p3 = ScrapePerson('Dwayne Johnson', image='http://example.com/3') with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data([p3.as_dict()]) # and now test that an update works and we can insert a new one with the same name p1.image = 'http://example.com/1.jpg' p2.birth_date = '1931' # change birth_date, means a new insert resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()]) assert Person.objects.count() == 3 assert resp['person']['insert'] == 1 assert resp['person']['noop'] == 0 assert resp['person']['update'] == 1
def test_same_name_second_import(): # ensure two people with the same name don't import without birthdays o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id') p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1') p2 = ScrapePerson('Dwayne Johnson', image='http://example.com/2') p1.birth_date = '1970' p2.birth_date = '1930' # when we give them birth dates all is well though resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()]) # fake some memberships so future lookups work on these people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) p3 = ScrapePerson('Dwayne Johnson', image='http://example.com/3') with pytest.raises(SameNameError): resp = PersonImporter('jurisdiction-id').import_data([p3.as_dict()])
def test_same_name_second_import(): create_jurisdiction() # ensure two people with the same name don't import without birthdays o = Organization.objects.create(name='WWE', jurisdiction_id='jid') p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1') p2 = ScrapePerson('Dwayne Johnson', image='http://example.com/2') p1.birth_date = '1970' p2.birth_date = '1930' # when we give them birth dates all is well though PersonImporter('jid').import_data([p1.as_dict(), p2.as_dict()]) # fake some memberships so future lookups work on these people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) p3 = ScrapePerson('Dwayne Johnson', image='http://example.com/3') with pytest.raises(SameNameError): PersonImporter('jid').import_data([p3.as_dict()])
def test_bill_sponsor_limit_lookup(): create_jurisdiction() org = create_org() bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', chamber='lower') bill.add_sponsorship_by_identifier(name="SNODGRASS", classification='sponsor', entity_type='person', primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") oi = OrganizationImporter('jid') pi = PersonImporter('jid') zs = ScrapePerson(name='Zadock Snodgrass', birth_date="1800-01-01") zs.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) zs2 = ScrapePerson(name='Zadock Snodgrass', birth_date="1900-01-01") zs2.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') # This is contrived and perhaps broken, but we're going to check this. # We *really* don't want to *ever* cross jurisdiction bounds. PersonImporter('another-jurisdiction').import_data([zs.as_dict()]) BillImporter('jid', oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry, ) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass" assert entry.person.birth_date == "1800-01-01"
def test_same_name_people(): # ensure two people with the same name don't import without birthdays o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id') p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1') p2 = ScrapePerson('Dwayne Johnson', image='http://example.com/2') # the people have the same name but are apparently different with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()]) # when we give them birth dates all is well though p1.birth_date = '1970' p2.birth_date = '1930' resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()]) assert resp['person']['insert'] == 2 assert resp['person']['noop'] == 0 assert resp['person']['update'] == 0 assert Person.objects.count() == 2 # fake some memberships so future lookups work on these people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # and now test that an update works and we can insert a new one with the same name p1.image = 'http://example.com/1.jpg' p2.birth_date = '1931' # change birth_date, means a new insert resp = PersonImporter('jurisdiction-id').import_data([p1.as_dict(), p2.as_dict()]) assert Person.objects.count() == 3 assert resp['person']['insert'] == 1 assert resp['person']['noop'] == 0 assert resp['person']['update'] == 1
def test_same_name_people(): # ensure two people with the same name don't import without birthdays o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id') p1 = ScrapePerson('Dwayne Johnson', image='http://example.com/1') p2 = ScrapePerson('Dwayne Johnson', image='http://example.com/2') # the people have the same name but are apparently different with pytest.raises(SameNameError): PersonImporter('jurisdiction-id').import_data( [p1.as_dict(), p2.as_dict()]) # when we give them birth dates all is well though p1.birth_date = '1970' p2.birth_date = '1930' resp = PersonImporter('jurisdiction-id').import_data( [p1.as_dict(), p2.as_dict()]) assert resp['person']['insert'] == 2 assert resp['person']['noop'] == 0 assert resp['person']['update'] == 0 assert Person.objects.count() == 2 # fake some memberships so future lookups work on these people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # and now test that an update works and we can insert a new one with the same name p1.image = 'http://example.com/1.jpg' p2.birth_date = '1931' # change birth_date, means a new insert resp = PersonImporter('jurisdiction-id').import_data( [p1.as_dict(), p2.as_dict()]) assert Person.objects.count() == 3 assert resp['person']['insert'] == 1 assert resp['person']['noop'] == 0 assert resp['person']['update'] == 1
def test_save_related(): s = Scraper(juris, '/tmp/') p = Person('Michael Jordan') p.add_source('http://example.com') o = Organization('Chicago Bulls', classification='committee') o.add_source('http://example.com') p._related.append(o) with mock.patch('json.dump') as json_dump: s.save_object(p) assert json_dump.mock_calls == [mock.call(p.as_dict(), mock.ANY, cls=mock.ANY), mock.call(o.as_dict(), mock.ANY, cls=mock.ANY)]
def test_save_object_basics(): # ensure that save object dumps a file s = Scraper(juris, '/tmp/') p = Person('Michael Jordan') p.add_source('http://example.com') with mock.patch('json.dump') as json_dump: s.save_object(p) # ensure object is saved in right place filename = 'person_' + p._id + '.json' assert filename in s.output_names['person'] json_dump.assert_called_once_with(p.as_dict(), mock.ANY, cls=mock.ANY)
def test_save_related(): s = Scraper('jurisdiction', '/tmp/') p = Person('Michael Jordan') p.add_source('http://example.com') o = Organization('Chicago Bulls') o.add_source('http://example.com') p._related.append(o) with mock.patch('json.dump') as json_dump: s.save_object(p) assert json_dump.mock_calls == [mock.call(p.as_dict(), mock.ANY, cls=mock.ANY), mock.call(o.as_dict(), mock.ANY, cls=mock.ANY)]
def test_save_object_basics(): # ensure that save object dumps a file s = Scraper('jurisdiction', '/tmp/') p = Person('Michael Jordan') p.add_source('http://example.com') with mock.patch('json.dump') as json_dump: s.save_object(p) # ensure object is saved in right place filename = 'person_' + p._id + '.json' assert filename in s.output_names['person'] json_dump.assert_called_once_with(p.as_dict(), mock.ANY, cls=mock.ANY)
def test_bill_sponsor_limit_lookup(): create_jurisdiction() org = create_org() bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', chamber='lower') bill.add_sponsorship_by_identifier(name="SNODGRASS", classification='sponsor', entity_type='person', primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") oi = OrganizationImporter('jid') pi = PersonImporter('jid') zs = ScrapePerson(name='Zadock Snodgrass', birth_date="1800-01-01") zs.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) zs2 = ScrapePerson(name='Zadock Snodgrass', birth_date="1900-01-01") zs2.add_identifier(identifier='TOTALLY_REAL_ID', scheme='TOTALLY_REAL_SCHEME') # This is contrived and perhaps broken, but we're going to check this. # We *really* don't want to *ever* cross jurisdiction bounds. PersonImporter('another-jurisdiction').import_data([zs.as_dict()]) BillImporter('jid', oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry,) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass" assert entry.person.birth_date == "1800-01-01"
def test_no_membership_for_person(): org = Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") # import a person with no memberships p = ScrapePerson('a man without a country') person_imp = PersonImporter('fnd-jid') person_imp.import_data([p.as_dict()]) # try to import a membership dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', person_imp, dumb_imp, dumb_imp) with pytest.raises(NoMembershipsError): memimp.import_data([])
def test_multiple_memberships(): # there was a bug where two or more memberships to the same jurisdiction # would cause an ORM error, this test ensures that it is fixed p = Person.objects.create(name='Dwayne Johnson') o = Organization.objects.create(name='WWE', jurisdiction_id='jurisdiction-id') Membership.objects.create(person=p, organization=o) o = Organization.objects.create(name='WWF', jurisdiction_id='jurisdiction-id') Membership.objects.create(person=p, organization=o) person = ScrapePerson('Dwayne Johnson') pd = person.as_dict() PersonImporter('jurisdiction-id').import_data([pd]) # deduplication should still work assert Person.objects.all().count() == 1
def test_multiple_memberships(): create_jurisdiction() # there was a bug where two or more memberships to the same jurisdiction # would cause an ORM error, this test ensures that it is fixed p = Person.objects.create(name='Dwayne Johnson') o = Organization.objects.create(name='WWE', jurisdiction_id='jid') Membership.objects.create(person=p, organization=o) o = Organization.objects.create(name='WWF', jurisdiction_id='jid') Membership.objects.create(person=p, organization=o) person = ScrapePerson('Dwayne Johnson') pd = person.as_dict() PersonImporter('jid').import_data([pd]) # deduplication should still work assert Person.objects.all().count() == 1
def test_no_membership_for_person(): create_jurisdiction() Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") # import a person with no memberships p = ScrapePerson('a man without a country') person_imp = PersonImporter('fnd-jid') person_imp.import_data([p.as_dict()]) # try to import a membership dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', person_imp, dumb_imp, dumb_imp) with pytest.raises(NoMembershipsError): memimp.import_data([])
def test_no_membership_for_person_including_party(): """ even though party is specified we should still get a no memberships error because it doesn't bind the person to a jurisdiction, thus causing duplication """ org = Organization.objects.create(id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid") org = Organization.objects.create(id="dem", name="Democratic", classification="party") # import a person with no memberships p = ScrapePerson('a man without a country', party='Democratic') person_imp = PersonImporter('fnd-jid') person_imp.import_data([p.as_dict()]) # try to import a membership dumb_imp = DumbMockImporter() memimp = MembershipImporter('fnd-jid', person_imp, dumb_imp, dumb_imp) with pytest.raises(NoMembershipsError): memimp.import_data([p._related[0].as_dict()])
def test_full_bill(): create_jurisdiction() sp = ScrapePerson('Adam Smith') org = ScrapeOrganization(name='House', classification='lower') com = ScrapeOrganization(name='Arbitrary Committee', classification='committee', parent_id=org._id) oldbill = ScrapeBill('HB 99', '1899', 'Axe & Tack Tax Act', classification='tax bill', from_organization=org._id) bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', from_organization=org._id) bill.subject = ['taxes', 'axes'] bill.add_identifier('SB 9') bill.add_title('Tack & Axe Tax Act') bill.add_action('introduced in house', '1900-04-01', chamber='lower') act = bill.add_action('sent to arbitrary committee', '1900-04-04', chamber='lower') act.add_related_entity('arbitrary committee', 'organization', com._id) bill.add_related_bill("HB 99", legislative_session="1899", relation_type="prior-session") bill.add_sponsorship('Adam Smith', classification='extra sponsor', entity_type='person', primary=False, entity_id=sp._id) bill.add_sponsorship('Jane Smith', classification='lead sponsor', entity_type='person', primary=True) bill.add_abstract('This is an act about axes and taxes and tacks.', note="official", date='1969-10-20') bill.add_document_link('Fiscal Note', 'http://example.com/fn.pdf', media_type='application/pdf') bill.add_document_link('Fiscal Note', 'http://example.com/fn.html', media_type='text/html') bill.add_version_link('Fiscal Note', 'http://example.com/v/1', media_type='text/html') bill.add_source('http://example.com/source') # import bill oi = OrganizationImporter('jid') oi.import_data([org.as_dict(), com.as_dict()]) pi = PersonImporter('jid') pi.import_data([sp.as_dict()]) BillImporter('jid', oi, pi).import_data([oldbill.as_dict(), bill.as_dict()]) # get bill from db and assert it imported correctly b = Bill.objects.get(identifier='HB 1') assert b.from_organization.classification == 'lower' assert b.identifier == bill.identifier assert b.title == bill.title assert b.classification == bill.classification assert b.subject == ['taxes', 'axes'] assert b.abstracts.get().note == 'official' assert b.abstracts.get().date == '1969-10-20' # other_title, other_identifier added assert b.other_titles.get().title == 'Tack & Axe Tax Act' assert b.other_identifiers.get().identifier == 'SB 9' # actions actions = list(b.actions.all()) assert len(actions) == 2 # ensure order was preserved (if this breaks it'll be intermittent) assert actions[0].organization == Organization.objects.get( classification='lower') assert actions[0].description == "introduced in house" assert actions[1].description == "sent to arbitrary committee" assert (actions[1].related_entities.get().organization == Organization.objects.get(classification='committee')) # related_bills were added rb = b.related_bills.get() assert rb.identifier == 'HB 99' # and bill got resolved assert rb.related_bill.identifier == 'HB 99' # sponsors added, linked & unlinked sponsorships = b.sponsorships.all() assert len(sponsorships) == 2 person = Person.objects.get(name='Adam Smith') for ss in sponsorships: if ss.primary: assert ss.person is None assert ss.organization is None else: assert ss.person == person # versions & documents with their links versions = b.versions.all() assert len(versions) == 1 assert versions[0].links.count() == 1 documents = b.documents.all() assert len(documents) == 1 assert documents[0].links.count() == 2 # sources assert b.sources.count() == 1
def test_full_bill(): create_jurisdiction() sp = ScrapePerson('Adam Smith') org = ScrapeOrganization(name='House', classification='lower') com = ScrapeOrganization(name='Arbitrary Committee', classification='committee', parent_id=org._id) oldbill = ScrapeBill('HB 99', '1899', 'Axe & Tack Tax Act', classification='tax bill', from_organization=org._id) bill = ScrapeBill('HB 1', '1900', 'Axe & Tack Tax Act', classification='tax bill', from_organization=org._id) bill.subject = ['taxes', 'axes'] bill.add_identifier('SB 9') bill.add_title('Tack & Axe Tax Act') bill.add_action('introduced in house', '1900-04-01', chamber='lower') act = bill.add_action('sent to arbitrary committee', '1900-04-04', chamber='lower') act.add_related_entity('arbitrary committee', 'organization', com._id) bill.add_related_bill("HB 99", legislative_session="1899", relation_type="prior-session") bill.add_sponsorship('Adam Smith', classification='extra sponsor', entity_type='person', primary=False, entity_id=sp._id) bill.add_sponsorship('Jane Smith', classification='lead sponsor', entity_type='person', primary=True) bill.add_abstract('This is an act about axes and taxes and tacks.', note="official", date='1969-10-20') bill.add_document_link('Fiscal Note', 'http://example.com/fn.pdf', media_type='application/pdf') bill.add_document_link('Fiscal Note', 'http://example.com/fn.html', media_type='text/html') bill.add_version_link('Fiscal Note', 'http://example.com/v/1', media_type='text/html') bill.add_source('http://example.com/source') # import bill oi = OrganizationImporter('jid') oi.import_data([org.as_dict(), com.as_dict()]) pi = PersonImporter('jid') pi.import_data([sp.as_dict()]) BillImporter('jid', oi, pi).import_data([oldbill.as_dict(), bill.as_dict()]) # get bill from db and assert it imported correctly b = Bill.objects.get(identifier='HB 1') assert b.from_organization.classification == 'lower' assert b.identifier == bill.identifier assert b.title == bill.title assert b.classification == bill.classification assert b.subject == ['taxes', 'axes'] assert b.abstracts.get().note == 'official' assert b.abstracts.get().date == '1969-10-20' # other_title, other_identifier added assert b.other_titles.get().title == 'Tack & Axe Tax Act' assert b.other_identifiers.get().identifier == 'SB 9' # actions actions = list(b.actions.all()) assert len(actions) == 2 # ensure order was preserved (if this breaks it'll be intermittent) assert actions[0].organization == Organization.objects.get(classification='lower') assert actions[0].description == "introduced in house" assert actions[1].description == "sent to arbitrary committee" assert (actions[1].related_entities.get().organization == Organization.objects.get(classification='committee')) # related_bills were added rb = b.related_bills.get() assert rb.identifier == 'HB 99' # and bill got resolved assert rb.related_bill.identifier == 'HB 99' # sponsors added, linked & unlinked sponsorships = b.sponsorships.all() assert len(sponsorships) == 2 person = Person.objects.get(name='Adam Smith') for ss in sponsorships: if ss.primary: assert ss.person is None assert ss.organization is None else: assert ss.person == person # versions & documents with their links versions = b.versions.all() assert len(versions) == 1 assert versions[0].links.count() == 1 documents = b.documents.all() assert len(documents) == 1 assert documents[0].links.count() == 2 # sources assert b.sources.count() == 1