def test_same_name_people(): create_jurisdiction() o = Organization.objects.create(name="WWE", jurisdiction_id="jid") # importing two people with the same name to a pristine database should error p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1") p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2") with pytest.raises(SameNameError): PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) # importing one person should pass PersonImporter("jid").import_data([p1.as_dict()]) # create fake memberships so that future lookups work on the imported people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # importing another person with the same name should fail with pytest.raises(SameNameError): PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) # adding birth dates should pass p1.birth_date = "1970" p2.birth_date = "1930" resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) assert resp["person"]["insert"] == 1 assert resp["person"]["noop"] == 0 assert resp["person"]["update"] == 1 assert Person.objects.count() == 2 # create fake memberships so that future lookups work on the imported people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) # adding a third person with the same name but without a birthday should error p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3") with pytest.raises(SameNameError): PersonImporter("jid").import_data([p3.as_dict()]) # and now test that an update works and we can insert a new one with the same name p1.image = "http://example.com/1.jpg" p2.birth_date = "1931" # change birth_date, means a new insert resp = PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) assert Person.objects.count() == 3 assert resp["person"]["insert"] == 1 assert resp["person"]["noop"] == 0 assert resp["person"]["update"] == 1
def test_same_name_second_import(): create_jurisdiction() # ensure two people with the same name don't import without birthdays o = Organization.objects.create(name="WWE", jurisdiction_id="jid") p1 = ScrapePerson("Dwayne Johnson", image="http://example.com/1") p2 = ScrapePerson("Dwayne Johnson", image="http://example.com/2") p1.birth_date = "1970" p2.birth_date = "1930" # when we give them birth dates all is well though PersonImporter("jid").import_data([p1.as_dict(), p2.as_dict()]) # fake some memberships so future lookups work on these people for p in Person.objects.all(): Membership.objects.create(person=p, organization=o) p3 = ScrapePerson("Dwayne Johnson", image="http://example.com/3") with pytest.raises(SameNameError): PersonImporter("jid").import_data([p3.as_dict()])
def legislators(self, latest_only): legs = {} for member, chamber, term, url in self._memberships(latest_only): name, _, _, district, party = member.xpath("td") district = district.text detail_url = name.xpath("a/@href")[0] if party.text_content().strip() == "": party = "Independent" else: party = {"D": "Democratic", "R": "Republican", "I": "Independent"}[ party.text ] name = name.text_content().strip() # inactive legislator, skip them for now if name.endswith("*"): name = name.strip("*") continue name = AKA.get(name, name) if name in legs: p, terms = legs[name] terms.append((chamber, district, term, party)) else: p = Person(name, party=party) legs[name] = p, [(chamber, district, term, party)] p.add_source(url) p.add_source(detail_url) p.add_link(detail_url) birth_date = BIRTH_DATES.get(name, None) if birth_date: p.birth_date = birth_date leg_html = self.get(detail_url).text leg_doc = lxml.html.fromstring(leg_html) leg_doc.make_links_absolute(detail_url) hotgarbage = ( "Senate Biography Information for the 98th General " "Assembly is not currently available." ) if hotgarbage in leg_html: # The legislator's bio isn't available yet. self.logger.warning("No legislator bio available for " + name) continue photo_url = leg_doc.xpath('//img[contains(@src, "/members/")]/@src')[0] p.image = photo_url p.contact_details = [] # email email = leg_doc.xpath('//b[text()="Email: "]') if email: p.add_contact_detail( type="email", value=email[0].tail.strip(), note="Capitol Office" ) offices = { "Capitol Office": '//table[contains(string(), "Springfield Office")]', "District Office": '//table[contains(string(), "District Office")]', } for location, xpath in offices.items(): table = leg_doc.xpath(xpath) if table: for type, value in self._table_to_office(table[3]): if type in ("fax", "voice") and not validate_phone_number( value ): continue p.add_contact_detail(type=type, value=value, note=location) return legs