def test_full_vote_event(): j = create_jurisdiction() j.legislative_sessions.create(name="1900", identifier="1900") sp1 = ScrapePerson("John Smith", primary_org="lower") sp2 = ScrapePerson("Adam Smith", primary_org="lower") org = ScrapeOrganization(name="House", classification="lower") bill = ScrapeBill("HB 1", "1900", "Axe & Tack Tax Act", from_organization=org._id) vote_event = ScrapeVoteEvent( legislative_session="1900", motion_text="passage", start_date="1900-04-01", classification="passage:bill", result="pass", bill_chamber="lower", bill="HB 1", organization=org._id, ) vote_event.set_count("yes", 20) vote_event.yes("John Smith") vote_event.no("Adam Smith") oi = OrganizationImporter("jid") oi.import_data([org.as_dict()]) pi = PersonImporter("jid") pi.import_data([sp1.as_dict(), sp2.as_dict()]) mi = MembershipImporter("jid", pi, oi, DumbMockImporter()) mi.import_data([sp1._related[0].as_dict(), sp2._related[0].as_dict()]) bi = BillImporter("jid", oi, pi) bi.import_data([bill.as_dict()]) VoteEventImporter("jid", pi, oi, bi).import_data([vote_event.as_dict()]) assert VoteEvent.objects.count() == 1 ve = VoteEvent.objects.get() assert ve.legislative_session == LegislativeSession.objects.get() assert ve.motion_classification == ["passage:bill"] assert ve.bill == Bill.objects.get() count = ve.counts.get() assert count.option == "yes" assert count.value == 20 votes = list(ve.votes.all()) assert len(votes) == 2 for v in ve.votes.all(): if v.voter_name == "John Smith": assert v.option == "yes" assert v.voter == Person.objects.get(name="John Smith") else: assert v.option == "no" assert v.voter == Person.objects.get(name="Adam Smith")
def test_same_name_people_other_name(): create_jurisdiction() # ensure we're taking other_names into account for the name collision code Organization.objects.create(name="WWE", jurisdiction_id="jid") p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1") p2 = ScrapePerson("Rock", image="http://example.com/2") p2.add_name("Dwayne Johnson") # the people have the same name but are apparently different with pytest.raises(SameNameError): PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()])
def test_multiple_orgs_of_same_class(): """ We should be able to set memberships on organizations with the same classification within the same jurisdictions """ create_jurisdiction() Organization.objects.create( id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid", ) Organization.objects.create( id="fdr", name="Federation", classification="foundation", jurisdiction_id="fnd-jid", ) hari = ScrapePerson( "Hari Seldon", primary_org="foundation", role="founder", primary_org_name="Foundation", ) picard = ScrapePerson( "Jean Luc Picard", primary_org="foundation", role="founder", primary_org_name="Federation", ) person_imp = PersonImporter("fnd-jid") person_imp.import_data([hari.as_dict()]) person_imp.import_data([picard.as_dict()]) # try to import a membership org_imp = OrganizationImporter("fnd-jid") dumb_imp = DumbMockImporter() memimp = MembershipImporter("fnd-jid", person_imp, org_imp, dumb_imp) memimp.import_data( [hari._related[0].as_dict(), picard._related[0].as_dict()]) assert (Person.objects.get( name="Hari Seldon").memberships.get().organization.name == "Foundation" ) assert (Person.objects.get(name="Jean Luc Picard").memberships.get(). organization.name == "Federation")
def test_full_person(): person = ScrapePerson("Tom Sawyer") person.add_identifier("1") person.add_name("Tommy", start_date="1880") person.add_contact_detail(type="phone", value="555-555-1234", note="this is fake") person.add_link("http://example.com/link") person.add_source("http://example.com/source") # import person pd = person.as_dict() PersonImporter("jid").import_data([pd]) # get person from db and assert it imported correctly p = Person.objects.get() assert "ocd-person" in p.id assert p.name == person.name assert p.identifiers.all()[0].identifier == "1" assert p.identifiers.all()[0].scheme == "" assert p.other_names.all()[0].name == "Tommy" assert p.other_names.all()[0].start_date == "1880" assert p.contact_details.all()[0].type == "phone" assert p.contact_details.all()[0].value == "555-555-1234" assert p.contact_details.all()[0].note == "this is fake" assert p.links.all()[0].url == "http://example.com/link" assert p.sources.all()[0].url == "http://example.com/source"
def test_bill_sponsor_by_identifier(): create_jurisdiction() org = create_org() bill = ScrapeBill( "HB 1", "1900", "Axe & Tack Tax Act", classification="tax bill", chamber="lower" ) bill.add_sponsorship_by_identifier( name="SNODGRASS", classification="sponsor", entity_type="person", primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME", ) oi = OrganizationImporter("jid") pi = PersonImporter("jid") zs = ScrapePerson(name="Zadock Snodgrass") zs.add_identifier(identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) BillImporter("jid", oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry,) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass"
def test_no_membership_for_person_including_party(): """ even though party is specified we should still get a no memberships error because it doesn't bind the person to a jurisdiction, thus causing duplication """ create_jurisdiction() Organization.objects.create( id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid", ) Organization.objects.create(id="dem", name="Democratic", classification="party") # import a person with no memberships p = ScrapePerson("a man without a country", party="Democratic") person_imp = PersonImporter("fnd-jid") org_imp = OrganizationImporter("fnd-jid") person_imp.import_data([p.as_dict()]) # try to import a membership dumb_imp = DumbMockImporter() memimp = MembershipImporter("fnd-jid", person_imp, org_imp, dumb_imp) with pytest.raises(NoMembershipsError): memimp.import_data([p._related[0].as_dict()])
def test_same_name_people(): create_jurisdiction() o = Organization.objects.create(name="WWE", jurisdiction_id="jid") # importing two people with the same name to a pristine database should error p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1") p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2") with pytest.raises(SameNameError): PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) # importing one person should pass PersonImporter("jid").import_data([p1.as_dict()]) # create fake memberships so that future lookups work on the imported people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # importing another person with the same name should fail with pytest.raises(SameNameError): PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) # adding birth dates should pass p1.birth_date = "1970" p2.birth_date = "1930" resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) assert resp["person"]["insert"] == 1 assert resp["person"]["noop"] == 0 assert resp["person"]["update"] == 1 assert Person.objects.count() == 2 # create fake memberships so that future lookups work on the imported people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # adding a third person with the same name but without a birthday should error p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3") with pytest.raises(SameNameError): PersonImporter("jid").import_data([p3.as_dict()]) # and now test that an update works and we can insert a new one with the same name p1.image = "http://example.com/1.jpg" p2.birth_date = "1931" # change birth_date, means a new insert resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) assert Person.objects.count() == 3 assert resp["person"]["insert"] == 1 assert resp["person"]["noop"] == 0 assert resp["person"]["update"] == 1
def test_deduplication_no_jurisdiction_overlap(): create_jurisdiction() create_person() # make sure we get a new person if we're in a different org person = ScrapePerson("Dwayne Johnson") pd = person.as_dict() PersonImporter("new-jurisdiction-id").import_data([pd]) assert Person.objects.all().count() == 2
def test_deduplication_same_name(): create_jurisdiction() create_person() # simplest case- just the same name person = ScrapePerson("Dwayne Johnson") pd = person.as_dict() PersonImporter("jid").import_data([pd]) assert Person.objects.all().count() == 1
def test_deduplication_other_name_exists(): create_jurisdiction() create_person() # Rocky is already saved in other_names person = ScrapePerson("Rocky") pd = person.as_dict() PersonImporter("jid").import_data([pd]) assert Person.objects.all().count() == 1
def test_invalid_fields_related_item(): p1 = ScrapePerson("Dwayne") p1.add_link("http://example.com") p1 = p1.as_dict() p1["links"][0]["test"] = 3 with pytest.raises(DataImportError): PersonImporter("jid").import_data([p1])
def test_deduplication_no_name_overlap(): create_jurisdiction() create_person() # make sure we're not just being ridiculous and avoiding importing anything in the same org person = ScrapePerson("CM Punk") pd = person.as_dict() PersonImporter("jid").import_data([pd]) assert Person.objects.all().count() == 2
def test_deduplication_other_name_overlaps(): create_jurisdiction() create_person() # Person has other_name that overlaps w/ existing name person = ScrapePerson("The Rock") person.add_name("Dwayne Johnson") pd = person.as_dict() PersonImporter("jid").import_data([pd]) assert Person.objects.all().count() == 1
def test_same_name_second_import(): create_jurisdiction() # ensure two people with the same name don't import without birthdays o = Organization.objects.create(name="WWE", jurisdiction_id="jid") p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1") p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2") p1.birth_date = "1970" p2.birth_date = "1930" # when we give them birth dates all is well though PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) # fake some memberships so future lookups work on these people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3") with pytest.raises(SameNameError): PersonImporter("jid").import_data([p3.as_dict()])
def test_save_object_basics(): # ensure that save object dumps a file s = Scraper(juris, "/tmp/") p = Person("Michael Jordan") p.add_source("http://example.com") with mock.patch("json.dump") as json_dump: s.save_object(p) # ensure object is saved in right place filename = "person_" + p._id + ".json" assert filename in s.output_names["person"] json_dump.assert_called_once_with(p.as_dict(), mock.ANY, cls=mock.ANY)
def test_bill_sponsor_limit_lookup(): create_jurisdiction() org = create_org() bill = ScrapeBill( "HB 1", "1900", "Axe & Tack Tax Act", classification="tax bill", chamber="lower" ) bill.add_sponsorship_by_identifier( name="SNODGRASS", classification="sponsor", entity_type="person", primary=True, identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME", ) oi = OrganizationImporter("jid") pi = PersonImporter("jid") zs = ScrapePerson(name="Zadock Snodgrass", birth_date="1800-01-01") zs.add_identifier(identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") pi.import_data([zs.as_dict()]) za_db = Person.objects.get() Membership.objects.create(person_id=za_db.id, organization_id=org.id) zs2 = ScrapePerson(name="Zadock Snodgrass", birth_date="1900-01-01") zs2.add_identifier(identifier="TOTALLY_REAL_ID", scheme="TOTALLY_REAL_SCHEME") # This is contrived and perhaps broken, but we're going to check this. # We *really* don't want to *ever* cross jurisdiction bounds. PersonImporter("another-jurisdiction").import_data([zs.as_dict()]) BillImporter("jid", oi, pi).import_data([bill.as_dict()]) obj = Bill.objects.get() (entry,) = obj.sponsorships.all() assert entry.person.name == "Zadock Snodgrass" assert entry.person.birth_date == "1800-01-01"
def test_save_related(): s = Scraper(juris, "/tmp/") p = Person("Michael Jordan") p.add_source("http://example.com") o = Organization("Chicago Bulls", classification="committee") o.add_source("http://example.com") p._related.append(o) with mock.patch("json.dump") as json_dump: s.save_object(p) assert json_dump.mock_calls == [ mock.call(p.as_dict(), mock.ANY, cls=mock.ANY), mock.call(o.as_dict(), mock.ANY, cls=mock.ANY), ]
def test_multiple_memberships(): create_jurisdiction() # there was a bug where two or more memberships to the same jurisdiction # would cause an ORM error, this test ensures that it is fixed p = Person.objects.create(name="Dwayne Johnson") o = Organization.objects.create(name="WWE", jurisdiction_id="jid") Membership.objects.create(person=p, organization=o) o = Organization.objects.create(name="WWF", jurisdiction_id="jid") Membership.objects.create(person=p, organization=o) person = ScrapePerson("Dwayne Johnson") pd = person.as_dict() PersonImporter("jid").import_data([pd]) # deduplication should still work assert Person.objects.all().count() == 1
def test_no_membership_for_person(): create_jurisdiction() Organization.objects.create( id="fnd", name="Foundation", classification="foundation", jurisdiction_id="fnd-jid", ) # import a person with no memberships p = ScrapePerson("a man without a country") person_imp = PersonImporter("fnd-jid") person_imp.import_data([p.as_dict()]) # try to import a membership dumb_imp = DumbMockImporter() memimp = MembershipImporter("fnd-jid", person_imp, dumb_imp, dumb_imp) with pytest.raises(NoMembershipsError): memimp.import_data([])
def test_full_bill(): create_jurisdiction() sp = ScrapePerson("Adam Smith") org = ScrapeOrganization(name="House", classification="lower") com = ScrapeOrganization( name="Arbitrary Committee", classification="committee", parent_id=org._id ) oldbill = ScrapeBill( "HB 99", "1899", "Axe & Tack Tax Act", classification="tax bill", from_organization=org._id, ) bill = ScrapeBill( "HB 1", "1900", "Axe & Tack Tax Act", classification="tax bill", from_organization=org._id, ) bill.subject = ["taxes", "axes"] bill.add_identifier("SB 9") bill.add_title("Tack & Axe Tax Act") bill.add_action("introduced in house", "1900-04-01", chamber="lower") act = bill.add_action("sent to arbitrary committee", "1900-04-04", chamber="lower") act.add_related_entity("arbitrary committee", "organization", com._id) bill.add_related_bill( "HB 99", legislative_session="1899", relation_type="prior-session" ) bill.add_sponsorship( "Adam Smith", classification="extra sponsor", entity_type="person", primary=False, entity_id=sp._id, ) bill.add_sponsorship( "Jane Smith", classification="lead sponsor", entity_type="person", primary=True ) bill.add_abstract( "This is an act about axes and taxes and tacks.", note="official", date="1969-10-20", ) bill.add_document_link( "Fiscal Note", "http://example.com/fn.pdf", media_type="application/pdf" ) bill.add_document_link( "Fiscal Note", "http://example.com/fn.html", media_type="text/html" ) bill.add_version_link( "Fiscal Note", "http://example.com/v/1", media_type="text/html" ) bill.add_source("http://example.com/source") # import bill oi = OrganizationImporter("jid") oi.import_data([org.as_dict(), com.as_dict()]) pi = PersonImporter("jid") pi.import_data([sp.as_dict()]) BillImporter("jid", oi, pi).import_data([oldbill.as_dict(), bill.as_dict()]) # get bill from db and assert it imported correctly b = Bill.objects.get(identifier="HB 1") assert b.from_organization.classification == "lower" assert b.identifier == bill.identifier assert b.title == bill.title assert b.classification == bill.classification assert b.subject == ["taxes", "axes"] assert b.abstracts.get().note == "official" assert b.abstracts.get().date == "1969-10-20" # other_title, other_identifier added assert b.other_titles.get().title == "Tack & Axe Tax Act" assert b.other_identifiers.get().identifier == "SB 9" # actions actions = list(b.actions.all()) assert len(actions) == 2 # ensure order was preserved (if this breaks it'll be intermittent) assert actions[0].organization == Organization.objects.get(classification="lower") assert actions[0].description == "introduced in house" assert actions[1].description == "sent to arbitrary committee" assert actions[1].related_entities.get().organization == Organization.objects.get( classification="committee" ) # related_bills were added rb = b.related_bills.get() assert rb.identifier == "HB 99" # and bill got resolved assert rb.related_bill.identifier == "HB 99" # sponsors added, linked & unlinked sponsorships = b.sponsorships.all() assert len(sponsorships) == 2 person = Person.objects.get(name="Adam Smith") for ss in sponsorships: if ss.primary: assert ss.person is None assert ss.organization is None else: assert ss.person == person # versions & documents with their links versions = b.versions.all() assert len(versions) == 1 assert versions[0].links.count() == 1 documents = b.documents.all() assert len(documents) == 1 assert documents[0].links.count() == 2 # sources assert b.sources.count() == 1