def to_dict(self): d = {} for key in ('voice', 'email', 'fax', 'address'): val = getattr(self, key) if val: if key in ('voice', 'fax'): val = reformat_phone_number(val) d[key] = val if d: d["note"] = self.note return d
def to_dict(self): d = {} for key in ("voice", "email", "fax", "address"): val = getattr(self, key) if val: if key in ("voice", "fax"): val = reformat_phone_number(val) d[key] = val if d: d["note"] = self.note return d
def process_person(person, jurisdiction_id): optional_keys = ( "image", "gender", "biography", "given_name", "family_name", "birth_date", "death_date", "national_identity", "summary", # maybe post-process these? "other_names", ) result = OrderedDict( id=ocd_uuid("person"), name=person["name"], party=[], roles=[], contact_details=[], links=[process_link(link) for link in person["links"]], sources=[process_link(link) for link in person["sources"]], ) contact_details = defaultdict(lambda: defaultdict(list)) for detail in person["contact_details"]: value = detail["value"] if detail["type"] in ("voice", "fax"): value = reformat_phone_number(value) elif detail["type"] == "address": value = reformat_address(value) contact_details[detail["note"]][detail["type"]] = value result["contact_details"] = [{"note": key, **val} for key, val in contact_details.items()] for membership in person["memberships"]: organization_id = membership["organization_id"] if not organization_id.startswith("~"): raise ValueError(organization_id) org = json.loads(organization_id[1:]) if org["classification"] in ("upper", "lower", "legislature"): post = json.loads(membership["post_id"][1:])["label"] result["roles"] = [ { "type": org["classification"], "district": str(post), "jurisdiction": jurisdiction_id, } ] elif org["classification"] == "party": result["party"] = [{"name": org["name"]}] for key in optional_keys: if person.get(key): result[key] = person[key] # promote some extras where appropriate extras = person.get("extras", {}).copy() for key in person.get("extras", {}).keys(): if key in optional_keys: result[key] = extras.pop(key) if extras: result["extras"] = extras if person.get("identifiers"): result["other_identifiers"] = person["identifiers"] return result
def make_governors(): with open("governors.csv") as f: data = csv.DictReader(f) for line in data: state = line["state"] name = line["name"] given_name = line["first_name"] family_name = line["last_name"] party = line["party"] birth_date = line["birth_date"] start_date = line["start_date"] end_date = line["end_date"] website = line["website"] twitter = line["twitter"] webform = line["webform"] full_address = "; ".join( [n.strip() for n in line["address"].splitlines()]) phone = line["phone"] email = line["email"] fax = line["fax"] contact = {"note": "Capitol Office"} if full_address: contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter jid = metadata.lookup(name=state).jurisdiction_id abbr = metadata.lookup(name=state).abbr.lower() obj = OrderedDict({ "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "birth_date": birth_date, "party": [{ "name": party }], "roles": [{ "jurisdiction": jid, "type": "governor", "start_date": start_date, "end_date": end_date, }], "contact_details": [contact], "ids": ids, "sources": [{ "url": website }], "links": [{ "url": website }, { "url": webform, "note": "webform" }], }) outdir = f"data/{abbr}/executive/" os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def process_person(person, jurisdiction_id): optional_keys = ( 'image', 'gender', 'biography', 'given_name', 'family_name', 'birth_date', 'death_date', 'national_identity', 'summary', # maybe post-process these? 'other_names', ) result = OrderedDict( id=ocd_uuid('person'), name=person['name'], party=[], roles=[], links=[process_link(link) for link in person['links']], contact_details=[], # maybe post-process these? sources=[process_link(link) for link in person['sources']], ) contact_details = defaultdict(lambda: defaultdict(list)) for detail in person['contact_details']: value = detail['value'] if detail['type'] in ('voice', 'fax'): value = reformat_phone_number(value) elif detail['type'] == 'address': value = reformat_address(value) contact_details[detail['note']][detail['type']] = value result['contact_details'] = [{ 'note': key, **val } for key, val in contact_details.items()] # memberships! for membership in person['memberships']: organization_id = membership['organization_id'] if not organization_id.startswith('~'): raise ValueError(organization_id) org = json.loads(organization_id[1:]) if org['classification'] in ('upper', 'lower'): post = json.loads(membership['post_id'][1:])['label'] result['roles'] = [{ 'type': org['classification'], 'district': post, 'jurisdiction': jurisdiction_id }] elif org['classification'] == 'party': result['party'] = [{'name': org['name']}] for key in optional_keys: if person.get(key): result[key] = person[key] # promote some extras where appropriate extras = person.get('extras', {}).copy() for key in person.get('extras', {}).keys(): if key in optional_keys: result[key] = extras.pop(key) if extras: result['extras'] = extras if person.get('identifiers'): result['other_identifiers'] = person['identifiers'] return result
def test_reformat_phone(input, output): assert reformat_phone_number(input) == output
def get_mayor_details(csv_fname): with open(csv_fname) as f: data = csv.DictReader(f) mayors_by_state = defaultdict(list) municipalities_by_state = defaultdict(list) for line in data: state = line["Postal Code"].lower() if state == "dc": continue # if state != state_to_import: # continue city = line["City"].strip() given_name = line["Given Name"].strip() family_name = line["Family Name"].strip() name = f"{given_name} {family_name}" email = line["Email"].strip() source = line["Source"].strip() phone = reformat_phone_number( f"{line['Voice']} line['Phone Extension']") address = line["Address"].strip() zipcode = line["Zip Code"].strip() if not line["Term End"]: term_end = "2022-01-01" # temporary term end date for the unknowns else: term_end = datetime.datetime.strptime( line["Term End"], "%m/%d/%Y").strftime("%Y-%m-%d") if term_end < "2020-09-24": click.secho(f"skipping retired {name}, {term_end}", fg="yellow") continue full_address = f"{address};{city}, {state.upper()} {zipcode}" contact = OrderedDict({"note": "Primary Office"}) if full_address: contact["address"] = full_address if phone: contact["voice"] = phone jid = city_to_jurisdiction(city, state) existing, retired = get_existing_mayor(state, name) if existing: pid = existing["id"] else: pid = ocd_uuid("person") if retired: os.remove(find_file(existing["id"])) mayors_by_state[state].append( OrderedDict({ "id": pid, "name": name, "given_name": given_name, "family_name": family_name, "roles": [{ "jurisdiction": jid, "type": "mayor", "end_date": term_end }], "contact_details": [contact], "sources": [{ "url": source }] if source else [], "links": [{ "url": source }] if source else [], "email": email, })) municipalities_by_state[state].append( OrderedDict({ "name": city, "id": jid })) return mayors_by_state, municipalities_by_state
def make_ceos(): with open("ceo.csv") as f: data = csv.DictReader(f) for line in data: state = line["State"].strip() given_name = line["First"] family_name = line["Last"] name = f"{given_name} {family_name}" role = line["Role"].strip().lower() addr1 = line["Address 1"] addr2 = line["Address 2"] city = line["City"] state_abbr = line["Postal Code"] zip5 = line["Zip Code"] zip4 = line["Zip Plus 4"] phone = line["Phone"] email = line["Email"] fax = line["Fax"] contact_form = line["Contact Form"] source = line["Source"] twitter = line["Twitter"] party = line["Party"] if party == "R": party = "Republican" elif party == "D": party = "Democratic" else: party = "Independent" if role != "secretary of state": role = "chief election officer" full_address = "; ".join([addr1, addr2, f"{city}, {state_abbr} {zip5}-{zip4}"]) contact = {"note": "Capitol Office"} contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter try: jid = metadata.lookup(name=state).jurisdiction_id except KeyError: continue abbr = metadata.lookup(name=state).abbr.lower() links = [{"url": source}] if contact_form: links.append({"url": contact_form, "note": "webform"}) obj = OrderedDict( { "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "roles": [ { "jurisdiction": jid, "type": role.strip().lower(), "end_date": "2021-12-31", }, ], "contact_details": [contact], "ids": ids, "sources": [{"url": source}], "links": links, "party": [{"name": party}], } ) outdir = f"data/{abbr}/executive/" # os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def make_mayors(state_to_import): all_municipalities = [] os.makedirs(f"data/{state_to_import}/municipalities") with open("mayors.csv") as f: data = csv.DictReader(f) for line in data: state = line["Postal Code"].lower() if state != state_to_import: continue city = line["City"].strip() given_name = line["First"].strip() family_name = line["Last"].strip() name = f"{given_name} {family_name}" email = line["Email"].strip() webform = line["Web Form"].strip() phone = reformat_phone_number(line["Phone"]) fax = reformat_phone_number(line["Fax"]) address1 = line["Address 1"].strip() address2 = line["Address 2"].strip() zipcode = line["Zip Code"].strip() if line["Zip Plus 4"].strip(): zipcode += "-" + line["Zip Plus 4"].strip() if not line["Term End"]: term_end = "2021-01-01" # temporary term end date for the unknowns else: term_end = datetime.datetime.strptime( line["Term End"], "%m/%d/%Y").strftime("%Y-%m-%d") if address2: full_address = f"{address1};{address2};{city}, {state.upper()} {zipcode}" else: full_address = f"{address1};{city}, {state.upper()} {zipcode}" contact = {"note": "Primary Office"} if full_address: contact["address"] = full_address if fax: contact["fax"] = fax if phone: contact["voice"] = phone if email: contact["email"] = email jid = city_to_jurisdiction(city, state) all_municipalities.append(OrderedDict({"name": city, "id": jid})) obj = OrderedDict({ "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "roles": [{ "jurisdiction": jid, "type": "mayor", "end_date": term_end }], "contact_details": [contact], "sources": [{ "url": webform }] if webform else [], "links": [{ "url": webform }] if webform else [], }) dump_obj(obj, output_dir=f"data/{state}/municipalities/") dump_obj(all_municipalities, filename=f"data/{state_to_import}/municipalities.yml")