def test_normalize_atty_contact(self): pairs = [ { # Email and phone number 'q': "Landye Bennett Blumstein LLP\n" "701 West Eighth Avenue, Suite 1200\n" "Anchorage, AK 99501\n" "907-276-5152\n" "Email: [email protected]", 'a': ({ 'name': u"Landye Bennett Blumstein LLP", 'address1': u'701 West Eighth Ave.', 'address2': u'Suite 1200', 'city': u'Anchorage', 'state': u'AK', 'zip_code': u'99501', 'lookup_key': u'701westeighthavesuite1200anchoragelandyebennettblumsteinak99501', }, { 'email': u'*****@*****.**', 'phone': u'(907) 276-5152', 'fax': u'', }) }, { # PO Box 'q': "Sands Anderson PC\n" "P.O. Box 2188\n" "Richmond, VA 23218-2188\n" "(804) 648-1636", 'a': ({ 'name': u'Sands Anderson PC', 'address1': u'P.O. Box 2188', 'city': u'Richmond', 'state': u'VA', 'zip_code': u'23218-2188', 'lookup_key': u'pobox2188richmondsandsandersonva232182188', }, { 'phone': u'(804) 648-1636', 'fax': u'', 'email': u'', }) }, { # Lowercase state (needs normalization) 'q': "Sands Anderson PC\n" "P.O. Box 2188\n" "Richmond, va 23218-2188\n" "(804) 648-1636", 'a': ({ 'name': u'Sands Anderson PC', 'address1': u'P.O. Box 2188', 'city': u'Richmond', 'state': u'VA', 'zip_code': u'23218-2188', 'lookup_key': u'pobox2188richmondsandsandersonva232182188', }, { 'phone': u"(804) 648-1636", 'fax': u'', 'email': u'', }) }, { # Phone, fax, and email -- the whole package. 'q': "Susman Godfrey, LLP\n" "1201 Third Avenue, Suite 3800\n" "Seattle, WA 98101\n" "206-373-7381\n" "Fax: 206-516-3883\n" "Email: [email protected]", 'a': ({ 'name': u'Susman Godfrey, LLP', 'address1': u'1201 Third Ave.', 'address2': u'Suite 3800', 'city': u'Seattle', 'state': u'WA', 'zip_code': u'98101', 'lookup_key': u'1201thirdavesuite3800seattlesusmangodfreywa98101', }, { 'phone': u'(206) 373-7381', 'fax': u'(206) 516-3883', 'email': u'*****@*****.**', }) }, { # No recipient name 'q': "211 E. Livingston Ave\n" "Columbus, OH 43215\n" "(614) 228-3727\n" "Email:", 'a': ({ 'address1': u'211 E. Livingston Ave', 'city': u'Columbus', 'state': u'OH', 'zip_code': u'43215', 'lookup_key': u'211elivingstonavecolumbusoh43215', }, { 'phone': u'(614) 228-3727', 'email': u'', 'fax': u'', }), }, { # Weird ways of doing phone numbers 'q': """1200 Boulevard Tower 1018 Kanawha Boulevard, E Charleston, WV 25301 304/342-3174 Fax: 304/342-0448 Email: [email protected] """, 'a': ({ 'address1': u'1018 Kanawha Blvd., E', 'address2': u'1200 Blvd. Tower', 'city': u'Charleston', 'state': u'WV', 'zip_code': u'25301', 'lookup_key': u'1018kanawhablvde1200blvdtowercharlestonwv25301', }, { 'phone': '(304) 342-3174', 'fax': '(304) 342-0448', 'email': '*****@*****.**', }) }, { # Empty fax numbers (b/c PACER). 'q': """303 E 17th Ave Suite 300 Denver, CO 80203 303-861-1764 Fax: Email: [email protected] """, 'a': ({ 'address1': u'303 E 17th Ave', 'address2': u'Suite 300', 'city': u'Denver', 'state': u'CO', 'zip_code': u'80203', 'lookup_key': u'303e17thavesuite300denverco80203', }, { 'phone': u'(303) 861-1764', 'fax': u'', 'email': u'*****@*****.**', }) }, { # Funky phone number 'q': """Guerrini Law Firm 106 SOUTH MENTOR AVE. #150 Pasadena, CA 91106 626-229-9611-202 Fax: 626-229-9615 Email: [email protected] """, 'a': ({ 'name': u'Guerrini Law Firm', 'address1': u'106 South Mentor Ave.', 'address2': u'# 150', 'city': u'Pasadena', 'state': u'CA', 'zip_code': u'91106', 'lookup_key': u'106southmentorave150pasadenaguerrinilawfirmca91106', }, { 'phone': u'', 'fax': u'(626) 229-9615', 'email': u'*****@*****.**', }) }, { 'q': """Duncan & Sevin, LLC 400 Poydras St. Suite 1200 New Orleans, LA 70130 """, 'a': ({ 'name': u'Duncan & Sevin, LLC', 'address1': u'400 Poydras St.', 'address2': u'Suite 1200', 'city': u'New Orleans', 'state': u'LA', 'zip_code': u'70130', 'lookup_key': u'400poydrasstsuite1200neworleansduncansevinllcla70130', }, { 'phone': u'', 'fax': u'', 'email': u'', }) }, { # Ambiguous address. Returns empty dict. 'q': """Darden, Koretzky, Tessier, Finn, Blossman & Areaux Energy Centre 1100 Poydras Street Suite 3100 New Orleans, LA 70163 504-585-3800 Email: [email protected] """, 'a': ({}, { 'phone': u'(504) 585-3800', 'email': u'*****@*****.**', 'fax': u'', }) }, { # Ambiguous address with unicode that triggers # https://github.com/datamade/probableparsing/issues/2 'q': u"""Darden, Koretzky, Tessier, Finn, Blossman & Areaux Energy Centre 1100 Poydras Street Suite 3100 New Orléans, LA 70163 504-585-3800 Email: [email protected] """, 'a': ({}, { 'phone': u'(504) 585-3800', 'email': u'*****@*****.**', 'fax': u'', }) }, { # Missing zip code, phone number ambiguously used instead. 'q': """NSB - Department of Law POB 69 Barrow, AK 907-852-0300 """, 'a': ({ 'name': u'NSB Department of Law', 'address1': u'Pob 69', 'city': u'Barrow', 'state': u'AK', 'zip_code': u'', 'lookup_key': u'pob69barrownsbdepartmentoflawak', }, { 'phone': u'', 'fax': u'', 'email': u'', }) }, { # Unknown/invalid state. 'q': """Kessler Topaz Meltzer Check LLP 280 King of Prussia Road Radnor, OA 19087 (610) 667-7706 Fax: (610) 667-7056 Email: [email protected] """, 'a': ({ 'name': u'Kessler Topaz Meltzer Check LLP', 'city': u'Radnor', 'address1': u'280 King of Prussia Road', 'lookup_key': u'280kingofprussiaroadradnorkesslertopazmeltzercheck19087', 'state': u'', 'zip_code': u'19087' }, { 'phone': u'(610) 667-7706', 'fax': u'(610) 667-7056', 'email': u'*****@*****.**' }) } ] for i, pair in enumerate(pairs): print("Normalizing address %s..." % i, end='') result = normalize_attorney_contact(pair['q']) self.maxDiff = None self.assertEqual(result, pair['a']) print('✓')
def add_attorney(atty, p, d): """Add/update an attorney. Given an attorney node, and a party and a docket object, add the attorney to the database or link the attorney to the new docket. Also add/update the attorney organization, and the attorney's role in the case. :param atty: A dict representing an attorney, as provided by Juriscraper. :param p: A Party object :param d: A Docket object :return: None if there's an error, or an Attorney object if not. """ newest_docket_date = max([dt for dt in [d.date_filed, d.date_terminated, d.date_last_filing] if dt]) atty_org_info, atty_info = normalize_attorney_contact( atty['contact'], fallback_name=atty['name'], ) try: q = Q() fields = { ('phone', atty_info['phone']), ('fax', atty_info['fax']), ('email', atty_info['email']), ('contact_raw', atty['contact']), ('organizations__lookup_key', atty_org_info.get('lookup_key')), } for field, lookup in fields: if lookup: q |= Q(**{field: lookup}) a, created = Attorney.objects.filter( Q(name=atty['name']) & q, ).distinct().get_or_create( defaults={ 'name': atty['name'], 'date_sourced': newest_docket_date, 'contact_raw': atty['contact'], }, ) except Attorney.MultipleObjectsReturned: logger.info("Got too many results for attorney: '%s'. Punting." % atty) return None # Associate the attorney with an org and update their contact info. if atty['contact']: if atty_org_info: logger.info("Adding organization information to '%s': '%s'" % (atty['name'], atty_org_info)) try: org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) except AttorneyOrganization.DoesNotExist: try: org = AttorneyOrganization.objects.create(**atty_org_info) except IntegrityError: # Race condition. Item was created after get. Try again. org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) # Add the attorney to the organization AttorneyOrganizationAssociation.objects.get_or_create( attorney=a, attorney_organization=org, docket=d, ) docket_info_is_newer = (a.date_sourced <= newest_docket_date) if atty_info and docket_info_is_newer: logger.info("Updating atty info because %s is more recent than %s." % (newest_docket_date, a.date_sourced)) a.date_sourced = newest_docket_date a.contact_raw = atty['contact'] a.email = atty_info['email'] a.phone = atty_info['phone'] a.fax = atty_info['fax'] a.save() # Do roles atty_roles = [normalize_attorney_role(r) for r in atty['roles']] atty_roles = filter(lambda r: r['role'] is not None, atty_roles) atty_roles = remove_duplicate_dicts(atty_roles) if len(atty_roles) > 0: logger.info("Linking attorney '%s' to party '%s' via %s roles: %s" % (atty['name'], p.name, len(atty_roles), atty_roles)) else: logger.info("No role data parsed. Linking via 'UNKNOWN' role.") atty_roles = [{'role': Role.UNKNOWN, 'date_action': None}] # Delete the old roles, replace with new. Role.objects.filter(attorney=a, party=p, docket=d).delete() Role.objects.bulk_create([ Role(attorney=a, party=p, docket=d, **atty_role) for atty_role in atty_roles ]) return a
def test_normalize_atty_contact(self): pairs = [ { # Email and phone number "q": "Landye Bennett Blumstein LLP\n" "701 West Eighth Avenue, Suite 1200\n" "Anchorage, AK 99501\n" "907-276-5152\n" "Email: [email protected]", "a": ( { "name": u"Landye Bennett Blumstein LLP", "address1": u"701 West Eighth Ave.", "address2": u"Suite 1200", "city": u"Anchorage", "state": u"AK", "zip_code": u"99501", "lookup_key": u"701westeighthavesuite1200anchoragelandyebennettblumsteinak99501", }, { "email": u"*****@*****.**", "phone": u"(907) 276-5152", "fax": u"", }, ), }, { # PO Box "q": "Sands Anderson PC\n" "P.O. Box 2188\n" "Richmond, VA 23218-2188\n" "(804) 648-1636", "a": ( { "name": u"Sands Anderson PC", "address1": u"P.O. Box 2188", "city": u"Richmond", "state": u"VA", "zip_code": u"23218-2188", "lookup_key": u"pobox2188richmondsandsandersonva232182188", }, {"phone": u"(804) 648-1636", "fax": u"", "email": u"",}, ), }, { # Lowercase state (needs normalization) "q": "Sands Anderson PC\n" "P.O. Box 2188\n" "Richmond, va 23218-2188\n" "(804) 648-1636", "a": ( { "name": u"Sands Anderson PC", "address1": u"P.O. Box 2188", "city": u"Richmond", "state": u"VA", "zip_code": u"23218-2188", "lookup_key": u"pobox2188richmondsandsandersonva232182188", }, {"phone": u"(804) 648-1636", "fax": u"", "email": u"",}, ), }, { # Phone, fax, and email -- the whole package. "q": "Susman Godfrey, LLP\n" "1201 Third Avenue, Suite 3800\n" "Seattle, WA 98101\n" "206-373-7381\n" "Fax: 206-516-3883\n" "Email: [email protected]", "a": ( { "name": u"Susman Godfrey, LLP", "address1": u"1201 Third Ave.", "address2": u"Suite 3800", "city": u"Seattle", "state": u"WA", "zip_code": u"98101", "lookup_key": u"1201thirdavesuite3800seattlesusmangodfreywa98101", }, { "phone": u"(206) 373-7381", "fax": u"(206) 516-3883", "email": u"*****@*****.**", }, ), }, { # No recipient name "q": "211 E. Livingston Ave\n" "Columbus, OH 43215\n" "(614) 228-3727\n" "Email:", "a": ( { "address1": u"211 E. Livingston Ave", "city": u"Columbus", "state": u"OH", "zip_code": u"43215", "lookup_key": u"211elivingstonavecolumbusoh43215", }, {"phone": u"(614) 228-3727", "email": u"", "fax": u"",}, ), }, { # Weird ways of doing phone numbers "q": """1200 Boulevard Tower 1018 Kanawha Boulevard, E Charleston, WV 25301 304/342-3174 Fax: 304/342-0448 Email: [email protected] """, "a": ( { "address1": u"1018 Kanawha Blvd., E", "address2": u"1200 Blvd. Tower", "city": u"Charleston", "state": u"WV", "zip_code": u"25301", "lookup_key": u"1018kanawhablvde1200blvdtowercharlestonwv25301", }, { "phone": "(304) 342-3174", "fax": "(304) 342-0448", "email": "*****@*****.**", }, ), }, { # Empty fax numbers (b/c PACER). "q": """303 E 17th Ave Suite 300 Denver, CO 80203 303-861-1764 Fax: Email: [email protected] """, "a": ( { "address1": u"303 E 17th Ave", "address2": u"Suite 300", "city": u"Denver", "state": u"CO", "zip_code": u"80203", "lookup_key": u"303e17thavesuite300denverco80203", }, { "phone": u"(303) 861-1764", "fax": u"", "email": u"*****@*****.**", }, ), }, { # Funky phone number "q": """Guerrini Law Firm 106 SOUTH MENTOR AVE. #150 Pasadena, CA 91106 626-229-9611-202 Fax: 626-229-9615 Email: [email protected] """, "a": ( { "name": u"Guerrini Law Firm", "address1": u"106 South Mentor Ave.", "address2": u"# 150", "city": u"Pasadena", "state": u"CA", "zip_code": u"91106", "lookup_key": u"106southmentorave150pasadenaguerrinilawfirmca91106", }, { "phone": u"", "fax": u"(626) 229-9615", "email": u"*****@*****.**", }, ), }, { "q": """Duncan & Sevin, LLC 400 Poydras St. Suite 1200 New Orleans, LA 70130 """, "a": ( { "name": u"Duncan & Sevin, LLC", "address1": u"400 Poydras St.", "address2": u"Suite 1200", "city": u"New Orleans", "state": u"LA", "zip_code": u"70130", "lookup_key": u"400poydrasstsuite1200neworleansduncansevinllcla70130", }, {"phone": u"", "fax": u"", "email": u"",}, ), }, { # Ambiguous address. Returns empty dict. "q": """Darden, Koretzky, Tessier, Finn, Blossman & Areaux Energy Centre 1100 Poydras Street Suite 3100 New Orleans, LA 70163 504-585-3800 Email: [email protected] """, "a": ( {}, { "phone": u"(504) 585-3800", "email": u"*****@*****.**", "fax": u"", }, ), }, { # Ambiguous address with unicode that triggers # https://github.com/datamade/probableparsing/issues/2 "q": u"""Darden, Koretzky, Tessier, Finn, Blossman & Areaux Energy Centre 1100 Poydras Street Suite 3100 New Orléans, LA 70163 504-585-3800 Email: [email protected] """, "a": ( {}, { "phone": u"(504) 585-3800", "email": u"*****@*****.**", "fax": u"", }, ), }, { # Missing zip code, phone number ambiguously used instead. "q": """NSB - Department of Law POB 69 Barrow, AK 907-852-0300 """, "a": ( { "name": u"NSB Department of Law", "address1": u"Pob 69", "city": u"Barrow", "state": u"AK", "zip_code": u"", "lookup_key": u"pob69barrownsbdepartmentoflawak", }, {"phone": u"", "fax": u"", "email": u"",}, ), }, { # Unknown/invalid state. "q": """Kessler Topaz Meltzer Check LLP 280 King of Prussia Road Radnor, OA 19087 (610) 667-7706 Fax: (610) 667-7056 Email: [email protected] """, "a": ( { "name": u"Kessler Topaz Meltzer Check LLP", "city": u"Radnor", "address1": u"280 King of Prussia Road", "lookup_key": u"280kingofprussiaroadradnorkesslertopazmeltzercheck19087", "state": u"", "zip_code": u"19087", }, { "phone": u"(610) 667-7706", "fax": u"(610) 667-7056", "email": u"*****@*****.**", }, ), }, ] for i, pair in enumerate(pairs): print("Normalizing address %s..." % i, end="") result = normalize_attorney_contact(pair["q"]) self.maxDiff = None self.assertEqual(result, pair["a"]) print("✓")
def add_attorney(atty, p, d): """Add/update an attorney. Given an attorney node, and a party and a docket object, add the attorney to the database or link the attorney to the new docket. Also add/update the attorney organization, and the attorney's role in the case. :param atty: A dict representing an attorney, as provided by Juriscraper. :param p: A Party object :param d: A Docket object :return: None if there's an error, or an Attorney ID if not. """ atty_org_info, atty_info = normalize_attorney_contact( atty['contact'], fallback_name=atty['name'], ) # Try lookup by atty name in the docket. attys = Attorney.objects.filter(name=atty['name'], roles__docket=d).distinct() count = attys.count() if count == 0: # Couldn't find the attorney. Make one. a = Attorney.objects.create( name=atty['name'], contact_raw=atty['contact'], ) elif count == 1: # Nailed it. a = attys[0] elif count >= 2: # Too many found, choose the most recent attorney. logger.info("Got too many results for atty: '%s'. Picking earliest." % atty) a = attys.earliest('date_created') # Associate the attorney with an org and update their contact info. if atty['contact']: if atty_org_info: try: org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) except AttorneyOrganization.DoesNotExist: try: org = AttorneyOrganization.objects.create(**atty_org_info) except IntegrityError: # Race condition. Item was created after get. Try again. org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) # Add the attorney to the organization AttorneyOrganizationAssociation.objects.get_or_create( attorney=a, attorney_organization=org, docket=d, ) if atty_info: a.contact_raw = atty['contact'] a.email = atty_info['email'] a.phone = atty_info['phone'] a.fax = atty_info['fax'] a.save() # Do roles roles = atty['roles'] if len(roles) == 0: roles = [{'role': Role.UNKNOWN, 'date_action': None}] # Delete the old roles, replace with new. Role.objects.filter(attorney=a, party=p, docket=d).delete() Role.objects.bulk_create([ Role(attorney=a, party=p, docket=d, **atty_role) for atty_role in roles ]) return a.pk
def test_normalize_atty_contact(self): pairs = [{ # Email and phone number 'q': "Landye Bennett Blumstein LLP\n" "701 West Eighth Avenue, Suite 1200\n" "Anchorage, AK 99501\n" "907-276-5152\n" "Email: [email protected]", 'a': ({ 'name': u"Landye Bennett Blumstein LLP", 'address1': u'701 West Eighth Ave.', 'address2': u'Suite 1200', 'city': u'Anchorage', 'state': u'AK', 'zip_code': u'99501', 'lookup_key': u'701westeighthavesuite1200anchoragelandyebennettblumsteinak99501', }, { 'email': u'*****@*****.**', 'phone': u'907-276-5152', 'fax': u'', }) }, { # PO Box 'q': "Sands Anderson PC\n" "P.O. Box 2188\n" "Richmond, VA 23218-2188\n" "(804) 648-1636", 'a': ({ 'name': u'Sands Anderson PC', 'address1': u'P.O. Box 2188', 'city': u'Richmond', 'state': u'VA', 'zip_code': u'23218-2188', 'lookup_key': u'pobox2188richmondsandsandersonva232182188', }, { 'phone': u'804648-1636', 'fax': u'', 'email': u'', }) }, { # Lowercase state (needs normalization) 'q': "Sands Anderson PC\n" "P.O. Box 2188\n" "Richmond, va 23218-2188\n" "(804) 648-1636", 'a': ({ 'name': u'Sands Anderson PC', 'address1': u'P.O. Box 2188', 'city': u'Richmond', 'state': u'VA', 'zip_code': u'23218-2188', 'lookup_key': u'pobox2188richmondsandsandersonva232182188', }, { 'phone': u"804648-1636", 'fax': u'', 'email': u'', }) }, { # Phone, fax, and email -- the whole package. 'q': "Susman Godfrey, LLP\n" "1201 Third Avenue, Suite 3800\n" "Seattle, WA 98101\n" "206-373-7381\n" "Fax: 206-516-3883\n" "Email: [email protected]", 'a': ({ 'name': u'Susman Godfrey, LLP', 'address1': u'1201 Third Ave.', 'address2': u'Suite 3800', 'city': u'Seattle', 'state': u'WA', 'zip_code': u'98101', 'lookup_key': u'1201thirdavesuite3800seattlesusmangodfreywa98101', }, { 'phone': u'206-373-7381', 'fax': u'206-516-3883', 'email': u'*****@*****.**', }) }, { # No recipient name 'q': "211 E. Livingston Ave\n" "Columbus, OH 43215\n" "(614) 228-3727\n" "Email:", 'a': ({ 'address1': u'211 E. Livingston Ave', 'city': u'Columbus', 'state': u'OH', 'zip_code': u'43215', 'lookup_key': u'211elivingstonavecolumbusoh43215', }, { 'phone': u'614228-3727', 'email': u'', 'fax': u'', }), }, { # Weird ways of doing phone numbers 'q': """1200 Boulevard Tower 1018 Kanawha Boulevard, E Charleston, WV 25301 304/342-3174 Fax: 304/342-0448 Email: [email protected] """, 'a': ({ 'address1': u'1018 Kanawha Blvd., E', 'address2': u'1200 Blvd. Tower', 'city': u'Charleston', 'state': u'WV', 'zip_code': u'25301', 'lookup_key': u'1018kanawhablvde1200blvdtowercharlestonwv25301', }, { 'phone': '304342-3174', 'fax': '304342-0448', 'email': '*****@*****.**', }) }, { # Empty fax numbers (b/c PACER). 'q': """303 E 17th Ave Suite 300 Denver, CO 80203 303-861-1764 Fax: Email: [email protected] """, 'a': ({ 'address1': u'303 E 17th Ave', 'address2': u'Suite 300', 'city': u'Denver', 'state': u'CO', 'zip_code': u'80203', 'lookup_key': u'303e17thavesuite300denverco80203', }, { 'phone': u'303-861-1764', 'fax': u'', 'email': u'*****@*****.**', }) }, { # Funky phone number 'q': """Guerrini Law Firm 106 SOUTH MENTOR AVE. #150 Pasadena, CA 91106 626-229-9611-202 Fax: 626-229-9615 Email: [email protected] """, 'a': ({ 'name': u'Guerrini Law Firm', 'address1': u'106 South Mentor Ave.', 'address2': u'# 150', 'city': u'Pasadena', 'state': u'CA', 'zip_code': u'91106', 'lookup_key': u'106southmentorave150pasadenaguerrinilawfirmca91106', }, { 'phone': u'', 'fax': u'626-229-9615', 'email': u'*****@*****.**', }) }, { 'q': """Duncan & Sevin, LLC 400 Poydras St. Suite 1200 New Orleans, LA 70130 """, 'a': ({ 'name': u'Duncan & Sevin, LLC', 'address1': u'400 Poydras St.', 'address2': u'Suite 1200', 'city': u'New Orleans', 'state': u'LA', 'zip_code': u'70130', 'lookup_key': u'400poydrasstsuite1200neworleansduncansevinllcla70130', }, { 'phone': u'', 'fax': u'', 'email': u'', }) }, { # Ambiguous address. Returns empty dict. 'q': """Darden, Koretzky, Tessier, Finn, Blossman & Areaux Energy Centre 1100 Poydras Street Suite 3100 New Orleans, LA 70163 504-585-3800 Email: [email protected] """, 'a': ({}, { 'phone': u'504-585-3800', 'email': u'*****@*****.**', 'fax': u'', }) }, { # Missing zip code, phone number ambiguously used instead. 'q': """NSB - Department of Law POB 69 Barrow, AK 907-852-0300 """, 'a': ({ 'name': u'NSB Department of Law', 'address1': u'Pob 69', 'city': u'Barrow', 'state': u'AK', 'zip_code': u'', 'lookup_key': u'pob69barrownsbdepartmentoflawak', }, { 'phone': u'', 'fax': u'', 'email': u'', }) }, { # Unknown/invalid state. 'q': """Kessler Topaz Meltzer Check LLP 280 King of Prussia Road Radnor, OA 19087 (610) 667-7706 Fax: (610) 667-7056 Email: [email protected] """, 'a': ({ 'name': u'Kessler Topaz Meltzer Check LLP', 'city': u'Radnor', 'address1': u'280 King of Prussia Road', 'lookup_key': u'280kingofprussiaroadradnorkesslertopazmeltzercheck19087', 'state': u'', 'zip_code': u'19087' }, { 'phone': u'610667-7706', 'fax': u'610667-7056', 'email': u'*****@*****.**' }) }] for i, pair in enumerate(pairs): print("Normalizing address %s..." % i, end='') result = normalize_attorney_contact(pair['q']) self.maxDiff = None self.assertEqual(result, pair['a']) print('✓')
def add_attorney(atty, p, d): """Add/update an attorney. Given an attorney node, and a party and a docket object, add the attorney to the database or link the attorney to the new docket. Also add/update the attorney organization, and the attorney's role in the case. :param atty: A dict representing an attorney, as provided by Juriscraper. :param p: A Party object :param d: A Docket object :return: None if there's an error, or an Attorney object if not. """ newest_docket_date = max([dt for dt in [d.date_filed, d.date_terminated, d.date_last_filing] if dt]) atty_org_info, atty_info = normalize_attorney_contact( atty['contact'], fallback_name=atty['name'], ) try: q = Q() fields = { ('phone', atty_info['phone']), ('fax', atty_info['fax']), ('email', atty_info['email']), ('contact_raw', atty['contact']), ('organizations__lookup_key', atty_org_info.get('lookup_key')), } for field, lookup in fields: if lookup: q |= Q(**{field: lookup}) a, created = Attorney.objects.filter( Q(name=atty['name']) & q, ).get_or_create( defaults={ 'name': atty['name'], 'date_sourced': newest_docket_date, 'contact_raw': atty['contact'], }, ) except Attorney.MultipleObjectsReturned: logger.info("Got too many results for attorney: '%s'. Punting." % atty) return None # Associate the attorney with an org and update their contact info. if atty['contact']: if atty_org_info: logger.info("Adding organization information to '%s': '%s'" % (atty['name'], atty_org_info)) try: org = AttorneyOrganization.objects.get( lookup_key=atty_org_info['lookup_key'], ) except AttorneyOrganization.DoesNotExist: org = AttorneyOrganization.objects.create(**atty_org_info) # Add the attorney to the organization AttorneyOrganizationAssociation.objects.get_or_create( attorney=a, attorney_organization=org, docket=d, ) docket_info_is_newer = (a.date_sourced <= newest_docket_date) if atty_info and docket_info_is_newer: logger.info("Updating atty info because %s is more recent than %s." % (newest_docket_date, a.date_sourced)) a.date_sourced = newest_docket_date a.contact_raw = atty['contact'] a.email = atty_info['email'] a.phone = atty_info['phone'] a.fax = atty_info['fax'] a.save() # Do roles atty_roles = [normalize_attorney_role(r) for r in atty['roles']] atty_roles = filter(lambda r: r['role'] is not None, atty_roles) atty_roles = remove_duplicate_dicts(atty_roles) if len(atty_roles) > 0: logger.info("Linking attorney '%s' to party '%s' via %s roles: %s" % (atty['name'], p.name, len(atty_roles), atty_roles)) else: logger.info("No role data parsed. Linking via 'UNKNOWN' role.") atty_roles = [{'role': Role.UNKNOWN, 'date_action': None}] # Delete the old roles, replace with new. Role.objects.filter(attorney=a, party=p, docket=d).delete() Role.objects.bulk_create([ Role(attorney=a, party=p, docket=d, **atty_role) for atty_role in atty_roles ]) return a