def create_person(fname, lname, name, state, district, party, rtype, url, image, start_date): person = OrderedDict({ "id": ocd_uuid("person"), "name": name or f"{fname} {lname}", "given_name": fname, "family_name": lname, "image": image, "party": [{ "name": party }], "roles": [{ "type": rtype, "district": district, "jurisdiction": get_jurisdiction_id(state), "start_date": start_date, }], "links": [{ "url": url }], "sources": [{ "url": url }], }) output_dir = get_data_dir(state) dump_obj(person, output_dir=os.path.join(output_dir, "people"))
def process_org(org, jurisdiction_id): return OrderedDict( id=ocd_uuid('organization'), name=org['name'], jurisdiction=jurisdiction_id, parent=org['parent_id'], classification=org['classification'], links=[process_link(link) for link in org['links']], sources=[process_link(link) for link in org['sources']], memberships=[], )
def process_org(org, jurisdiction_id): return OrderedDict( id=ocd_uuid("organization"), name=org["name"], jurisdiction=jurisdiction_id, parent=org["parent_id"], classification=org["classification"], links=[process_link(link) for link in org["links"]], sources=[process_link(link) for link in org["sources"]], memberships=[], )
def create_person(fname, lname, name, state, district, party, rtype, url, image, email, start_date): role = { "type": rtype, "district": district, "jurisdiction": get_jurisdiction_id(state), "start_date": start_date, } if rtype in ("upper", "lower", "legislature"): directory = "legislature" elif rtype in ("mayor", ): directory = "municipalities" role.pop("district") elif rtype in ("governor", "lt_governor"): directory = "executive" role.pop("district") else: raise ValueError(f"unknown role type {rtype}") person = OrderedDict({ "id": ocd_uuid("person"), "name": name or f"{fname} {lname}", "given_name": fname, "family_name": lname, "image": image, "email": email, "party": [{ "name": party }], "roles": [role], "links": [{ "url": url }], "sources": [{ "url": url }], }) output_dir = get_data_dir(state) dump_obj(person, output_dir=os.path.join(output_dir, directory))
def create_person(fname, lname, name, state, district, party, rtype, url, image, start_date): person = OrderedDict({ 'id': ocd_uuid('person'), 'name': name or f'{fname} {lname}', 'given_name': fname, 'family_name': lname, 'image': image, 'party': [{'name': party}], 'roles': [ {'type': rtype, 'district': district, 'jurisdiction': get_jurisdiction_id(state), 'start_date': start_date, } ], 'links': [{'url': url}], 'sources': [{'url': url}], }) output_dir = get_data_dir(state) dump_obj(person, output_dir=os.path.join(output_dir, 'people'))
def create_committee(*, name, state, parent, url): members = [] click.echo("Enter members, enter a blank member to stop.") while True: mname = click.prompt("Member name ('done' to stop)") if mname == "done": break members.append({"name": mname}) com = OrderedDict( { "id": ocd_uuid("organization"), "name": name, "classification": "committee", "jurisdiction": get_jurisdiction_id(state), "parent": parent, "sources": [{"url": url}], "links": [{"url": url}], "memberships": members, } ) output_dir = get_data_dir(state) dump_obj(com, output_dir=os.path.join(output_dir, "organizations"))
def process_person(person, jurisdiction_id): optional_keys = ( "image", "gender", "biography", "given_name", "family_name", "birth_date", "death_date", "national_identity", "summary", # maybe post-process these? "other_names", ) result = OrderedDict( id=ocd_uuid("person"), name=person["name"], party=[], roles=[], contact_details=[], links=[process_link(link) for link in person["links"]], sources=[process_link(link) for link in person["sources"]], ) contact_details = defaultdict(lambda: defaultdict(list)) for detail in person["contact_details"]: value = detail["value"] if detail["type"] in ("voice", "fax"): value = reformat_phone_number(value) elif detail["type"] == "address": value = reformat_address(value) contact_details[detail["note"]][detail["type"]] = value result["contact_details"] = [{"note": key, **val} for key, val in contact_details.items()] for membership in person["memberships"]: organization_id = membership["organization_id"] if not organization_id.startswith("~"): raise ValueError(organization_id) org = json.loads(organization_id[1:]) if org["classification"] in ("upper", "lower", "legislature"): post = json.loads(membership["post_id"][1:])["label"] result["roles"] = [ { "type": org["classification"], "district": str(post), "jurisdiction": jurisdiction_id, } ] elif org["classification"] == "party": result["party"] = [{"name": org["name"]}] for key in optional_keys: if person.get(key): result[key] = person[key] # promote some extras where appropriate extras = person.get("extras", {}).copy() for key in person.get("extras", {}).keys(): if key in optional_keys: result[key] = extras.pop(key) if extras: result["extras"] = extras if person.get("identifiers"): result["other_identifiers"] = person["identifiers"] return result
def make_governors(): with open("governors.csv") as f: data = csv.DictReader(f) for line in data: state = line["state"] name = line["name"] given_name = line["first_name"] family_name = line["last_name"] party = line["party"] birth_date = line["birth_date"] start_date = line["start_date"] end_date = line["end_date"] website = line["website"] twitter = line["twitter"] webform = line["webform"] full_address = "; ".join( [n.strip() for n in line["address"].splitlines()]) phone = line["phone"] email = line["email"] fax = line["fax"] contact = {"note": "Capitol Office"} if full_address: contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter jid = metadata.lookup(name=state).jurisdiction_id abbr = metadata.lookup(name=state).abbr.lower() obj = OrderedDict({ "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "birth_date": birth_date, "party": [{ "name": party }], "roles": [{ "jurisdiction": jid, "type": "governor", "start_date": start_date, "end_date": end_date, }], "contact_details": [contact], "ids": ids, "sources": [{ "url": website }], "links": [{ "url": website }, { "url": webform, "note": "webform" }], }) outdir = f"data/{abbr}/executive/" os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def get_mayor_details(csv_fname): with open(csv_fname) as f: data = csv.DictReader(f) mayors_by_state = defaultdict(list) municipalities_by_state = defaultdict(list) for line in data: state = line["Postal Code"].lower() if state == "dc": continue # if state != state_to_import: # continue city = line["City"].strip() given_name = line["Given Name"].strip() family_name = line["Family Name"].strip() name = f"{given_name} {family_name}" email = line["Email"].strip() source = line["Source"].strip() phone = reformat_phone_number( f"{line['Voice']} line['Phone Extension']") address = line["Address"].strip() zipcode = line["Zip Code"].strip() if not line["Term End"]: term_end = "2022-01-01" # temporary term end date for the unknowns else: term_end = datetime.datetime.strptime( line["Term End"], "%m/%d/%Y").strftime("%Y-%m-%d") if term_end < "2020-09-24": click.secho(f"skipping retired {name}, {term_end}", fg="yellow") continue full_address = f"{address};{city}, {state.upper()} {zipcode}" contact = OrderedDict({"note": "Primary Office"}) if full_address: contact["address"] = full_address if phone: contact["voice"] = phone jid = city_to_jurisdiction(city, state) existing, retired = get_existing_mayor(state, name) if existing: pid = existing["id"] else: pid = ocd_uuid("person") if retired: os.remove(find_file(existing["id"])) mayors_by_state[state].append( OrderedDict({ "id": pid, "name": name, "given_name": given_name, "family_name": family_name, "roles": [{ "jurisdiction": jid, "type": "mayor", "end_date": term_end }], "contact_details": [contact], "sources": [{ "url": source }] if source else [], "links": [{ "url": source }] if source else [], "email": email, })) municipalities_by_state[state].append( OrderedDict({ "name": city, "id": jid })) return mayors_by_state, municipalities_by_state
def make_ceos(): with open("ceo.csv") as f: data = csv.DictReader(f) for line in data: state = line["State"].strip() given_name = line["First"] family_name = line["Last"] name = f"{given_name} {family_name}" role = line["Role"].strip().lower() addr1 = line["Address 1"] addr2 = line["Address 2"] city = line["City"] state_abbr = line["Postal Code"] zip5 = line["Zip Code"] zip4 = line["Zip Plus 4"] phone = line["Phone"] email = line["Email"] fax = line["Fax"] contact_form = line["Contact Form"] source = line["Source"] twitter = line["Twitter"] party = line["Party"] if party == "R": party = "Republican" elif party == "D": party = "Democratic" else: party = "Independent" if role != "secretary of state": role = "chief election officer" full_address = "; ".join([addr1, addr2, f"{city}, {state_abbr} {zip5}-{zip4}"]) contact = {"note": "Capitol Office"} contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter try: jid = metadata.lookup(name=state).jurisdiction_id except KeyError: continue abbr = metadata.lookup(name=state).abbr.lower() links = [{"url": source}] if contact_form: links.append({"url": contact_form, "note": "webform"}) obj = OrderedDict( { "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "roles": [ { "jurisdiction": jid, "type": role.strip().lower(), "end_date": "2021-12-31", }, ], "contact_details": [contact], "ids": ids, "sources": [{"url": source}], "links": links, "party": [{"name": party}], } ) outdir = f"data/{abbr}/executive/" # os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def process_person(person, jurisdiction_id): optional_keys = ( 'image', 'gender', 'biography', 'given_name', 'family_name', 'birth_date', 'death_date', 'national_identity', 'summary', # maybe post-process these? 'other_names', ) result = OrderedDict( id=ocd_uuid('person'), name=person['name'], party=[], roles=[], contact_details=[], links=[process_link(link) for link in person['links']], sources=[process_link(link) for link in person['sources']], ) contact_details = defaultdict(lambda: defaultdict(list)) for detail in person['contact_details']: value = detail['value'] if detail['type'] in ('voice', 'fax'): value = reformat_phone_number(value) elif detail['type'] == 'address': value = reformat_address(value) contact_details[detail['note']][detail['type']] = value result['contact_details'] = [{ 'note': key, **val } for key, val in contact_details.items()] for membership in person['memberships']: organization_id = membership['organization_id'] if not organization_id.startswith('~'): raise ValueError(organization_id) org = json.loads(organization_id[1:]) if org['classification'] in ('upper', 'lower', 'legislature'): post = json.loads(membership['post_id'][1:])['label'] result['roles'] = [{ 'type': org['classification'], 'district': str(post), 'jurisdiction': jurisdiction_id }] elif org['classification'] == 'party': result['party'] = [{'name': org['name']}] for key in optional_keys: if person.get(key): result[key] = person[key] # promote some extras where appropriate extras = person.get('extras', {}).copy() for key in person.get('extras', {}).keys(): if key in optional_keys: result[key] = extras.pop(key) if extras: result['extras'] = extras if person.get('identifiers'): result['other_identifiers'] = person['identifiers'] return result
def make_mayors(state_to_import): all_municipalities = [] os.makedirs(f"data/{state_to_import}/municipalities") with open("mayors.csv") as f: data = csv.DictReader(f) for line in data: state = line["Postal Code"].lower() if state != state_to_import: continue city = line["City"].strip() given_name = line["First"].strip() family_name = line["Last"].strip() name = f"{given_name} {family_name}" email = line["Email"].strip() webform = line["Web Form"].strip() phone = reformat_phone_number(line["Phone"]) fax = reformat_phone_number(line["Fax"]) address1 = line["Address 1"].strip() address2 = line["Address 2"].strip() zipcode = line["Zip Code"].strip() if line["Zip Plus 4"].strip(): zipcode += "-" + line["Zip Plus 4"].strip() if not line["Term End"]: term_end = "2021-01-01" # temporary term end date for the unknowns else: term_end = datetime.datetime.strptime( line["Term End"], "%m/%d/%Y").strftime("%Y-%m-%d") if address2: full_address = f"{address1};{address2};{city}, {state.upper()} {zipcode}" else: full_address = f"{address1};{city}, {state.upper()} {zipcode}" contact = {"note": "Primary Office"} if full_address: contact["address"] = full_address if fax: contact["fax"] = fax if phone: contact["voice"] = phone if email: contact["email"] = email jid = city_to_jurisdiction(city, state) all_municipalities.append(OrderedDict({"name": city, "id": jid})) obj = OrderedDict({ "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "roles": [{ "jurisdiction": jid, "type": "mayor", "end_date": term_end }], "contact_details": [contact], "sources": [{ "url": webform }] if webform else [], "links": [{ "url": webform }] if webform else [], }) dump_obj(obj, output_dir=f"data/{state}/municipalities/") dump_obj(all_municipalities, filename=f"data/{state_to_import}/municipalities.yml")
def process_old_file(filename, metadata): data = json.load(open(filename)) if data["leg_id"] != data["_id"]: raise Exception() if data.get("active"): print(data) return raise Exception() if data.get("roles", []): raise Exception() # remove unused fields for k in ( "_yearly_contributions", "nimsp_candidate_id", "votesmart_id", "_contributions_start_year", "_scraped_name", "_total_contributions", "transparencydata_id", "_locked_fields", "level", "nimsp_id", "_type", "country", "updated_at", "_id", "active", "roles", "offices", "notice", "nickname", "district", "party", "chamber", "csrfmiddlewaretoken", "email", "created_at", "office_address", "office_phone", "occupation", "_guid", "_code", "all_ids", "2008-2011", ): data.pop(k, None) # remove plus fields for k in [k for k in data.keys() if k.startswith("+")]: data.pop(k) leg_obj = OrderedDict({"id": ocd_uuid("person")}) leg_obj["name"] = data.pop("full_name") first_name = data.pop("first_name") middle_name = data.pop("middle_name") last_name = data.pop("last_name") suffixes = data.pop("suffixes", "") suffix = data.pop("suffix", "") if first_name: leg_obj["given_name"] = first_name if last_name: leg_obj["family_name"] = last_name if middle_name: leg_obj["middle_name"] = middle_name if suffix: leg_obj["suffix"] = suffixes or suffix state = data.pop("state") jurisdiction_id = get_jurisdiction_id(state) # pull useful fields old_roles = data.pop("old_roles", {}) parties = set() new_roles = [] for session, roles in old_roles.items(): for role in roles: if role["type"] in ( "committee member", "Minority Floor Leader", "Majority Floor Leader", "Majority Caucus Chair", "Minority Caucus Chair", "Speaker Pro Tem", "President Pro Tem", "Senate President", "Speaker of the House", "Minority Whip", "Majority Whip", "Lt. Governor", ) or role.get("committee"): continue parties.add(role["party"]) new_roles.append({ "term": role["term"], "chamber": role["chamber"], "district": role["district"] }) leg_obj["party"] = [{"name": party} for party in parties] # add these to leg_obj roles = terms_to_roles(new_roles, metadata["terms"]) formatted_roles = [] for chamber, district, start, end in roles: formatted_roles.append( OrderedDict({ "district": district, "jurisdiction": jurisdiction_id, "type": chamber, "start_date": f"{start}-01-01", "end_date": f"{end}-12-31", })) leg_obj["roles"] = formatted_roles all_ids = data.pop("_all_ids") leg_id = data.pop("leg_id") if leg_id not in all_ids: all_ids.append(leg_id) image = data.pop("photo_url", "") if image: leg_obj["image"] = image url = data.pop("url", "") if url: leg_obj["links"] = [{"url": url}] leg_obj["sources"] = data.pop("sources") leg_obj["other_identifiers"] = [{ "identifier": id_, "scheme": "legacy_openstates" } for id_ in all_ids] if data: print(data) raise Exception() output_dir = get_data_dir(state) dump_obj(leg_obj, output_dir=os.path.join(output_dir, "retired"))