def validate_ocd_jurisdiction(v: typing.Any) -> str: try: lookup(jurisdiction_id=v) except KeyError: if not JURISDICTION_RE.match(v): raise ValueError(f"invalid jurisdiction_id {v}") return v
def validate_jurisdictions(person: Person, municipalities: list[str]) -> list[str]: errors = [] for role in person.roles: try: metadata.lookup(jurisdiction_id=role.jurisdiction) except KeyError: if role.jurisdiction not in municipalities: errors.append( f"{role.jurisdiction} is not a valid jurisdiction_id") return errors
def get_expected_districts(settings, abbr): expected = {} state = metadata.lookup(abbr=abbr) for chamber in state.chambers: chtype = "legislature" if chamber.chamber_type == "unicameral" else chamber.chamber_type expected[chtype] = { district.name: district.num_seats for district in chamber.districts } # remove vacancies vacancies = settings.get(abbr, {}).get("vacancies", []) if vacancies: click.secho(f"Processing {len(vacancies)} vacancies:") for vacancy in vacancies: if datetime.date.today() < vacancy["vacant_until"]: expected[vacancy["chamber"]][str(vacancy["district"])] -= 1 click.secho("\t{chamber}-{district} (until {vacant_until})".format( **vacancy), fg="yellow") else: click.secho( "\t{chamber}-{district} expired {vacant_until} remove & re-run" .format(**vacancy), fg="red", ) raise BadVacancy() return expected
def __new__(cls, name, bases, dct): c = super().__new__(cls, name, bases, dct) if name != "State": c.classification = "state" # while we're here, load the metadata (formerly on a cached property) name = _name_fixes.get(name, name) c.metadata = lookup(name=name) return c
def legacy_districts( abbr: typing.Optional[str] = None, jurisdiction_id: typing.Optional[str] = None) -> dict[str, list[str]]: """ can take jurisdiction_id or abbr via kwargs """ legacy_districts: dict[str, list[str]] = {"upper": [], "lower": []} for d in metadata.lookup(abbr=abbr, jurisdiction_id=jurisdiction_id).legacy_districts: legacy_districts[d.chamber_type].append(d.name) return legacy_districts
def add_committee(self, committee: ScrapeCommittee) -> None: # convert a ScrapeCommittee to a committee by giving it an ID full_com = Committee( id=f"ocd-organization/{uuid.uuid4()}", jurisdiction=lookup(abbr=self.abbr).jurisdiction_id, **committee.dict(), ) self.coms_by_chamber_and_name[committee.parent][committee.name] = full_com self.save_committee(full_com)
def validate_jurisdictions(person, municipalities): errors = [] for role in person.get("roles", []): jid = role.get("jurisdiction") try: state = metadata.lookup(jurisdiction_id=jid) except KeyError: state = None if jid and (not state and jid not in municipalities): errors.append(f"{jid} is not a valid jurisdiction_id") return errors
def jurisdiction_filter(j: str, *, jid_field): # check either by Jurisdiction.name or a specified field's jurisdiction_id if len(j) == 2: try: return jid_field == lookup(abbr=j).jurisdiction_id except KeyError: return models.Jurisdiction.name == j elif j.startswith("ocd-jurisdiction"): return jid_field == j else: return models.Jurisdiction.name == j
def coords_to_divisions(lat, lng): url = f"https://v3.openstates.org/divisions.geo?lat={lat}&lng={lng}" divisions = [] try: data = requests.get(url).json() for d in data["divisions"]: divisions.append(d["id"]) divisions.append(metadata.lookup(abbr=d["state"]).division_id) except Exception: # be very resilient pass return divisions
def add_vacancy(person: Person, until: datetime) -> None: with open("settings.yml") as f: settings = yaml.safe_load(f) last_role = person.roles[-1] abbr = metadata.lookup(jurisdiction_id=last_role.jurisdiction).abbr.lower() if abbr not in settings: settings[abbr] = {"vacancies": []} settings[abbr]["vacancies"].append({ "chamber": last_role.type, "district": last_role.district, "vacant_until": until.date(), }) dump_obj(settings, filename="settings.yml")
def test_create_full_jurisdiction_basic(): nc = lookup(abbr="NC") create_full_jurisdiction(nc) assert Jurisdiction.objects.count() == 1 juris = Jurisdiction.objects.get() assert juris.name == nc.name assert juris.organizations.count() == 4 assert (juris.organizations.get( classification="executive").id == nc.executive_organization_id) assert (juris.organizations.get( classification="legislature").id == nc.legislature_organization_id) # 120 + 50 assert Post.objects.count() == 170
def add_vacancy(person, until): with open("settings.yml") as f: settings = load_yaml(f) last_role = person["roles"][-1] abbr = metadata.lookup( jurisdiction_id=last_role["jurisdiction"]).abbr.lower() if abbr not in settings: settings[abbr] = {"vacancies": []} settings[abbr]["vacancies"].append({ "chamber": last_role["type"], "district": last_role["district"], "vacant_until": until.date(), }) dump_obj(settings, filename="settings.yml")
def incoming_merge(abbr, existing_people, new_people, retirement): unmatched = [] seats_for_district = {} state = metadata.lookup(abbr=abbr) for chamber in state.chambers: chtype = "legislature" if chamber.chamber_type == "unicameral" else chamber.chamber_type seats_for_district[chtype] = { district.name: district.num_seats for district in chamber.districts } # find candidate(s) for each new person for new in new_people: matched = False role_matches = [] for existing in existing_people: name_match = new["name"] == existing["name"] role_match = False for role in existing.get("roles", []): if role["type"] == "mayor": continue role_copy = role.copy() role_copy.pop("start_date", None) seats = seats_for_district[role_copy["type"]].get(role_copy["district"], 1) if new["roles"][0] == role_copy and seats == 1: role_match = True # if they match without start date, copy the start date over so it isn't # alterred or otherwise removed in the merge new["roles"][0] = role break if name_match or role_match: matched = interactive_merge( abbr, existing, new, name_match, role_match, retirement ) if matched: break # if we haven't matched and this was a role match, save this for later if role_match: role_matches.append(existing) else: # not matched unmatched.append((new, role_matches)) return unmatched
def test_create_federal_jurisdiction(): us = lookup(abbr="US") create_full_jurisdiction(us) assert Jurisdiction.objects.count() == 1 juris = Jurisdiction.objects.get() assert juris.name == us.name assert juris.classification == "country" assert juris.organizations.count() == 4 assert (juris.organizations.get( classification="executive").id == us.executive_organization_id) assert (juris.organizations.get( classification="legislature").id == us.legislature_organization_id) # 435 House + 50 Senate posts + 2 territories (DC & PR for now) assert Post.objects.filter(role="Representative").count() == 435 assert Post.objects.filter(role="Senator").count() == 50 assert Post.objects.filter(role="Delegate").count() == 5 assert Post.objects.filter(role="Resident Commissioner").count() == 1
def test_create_chamber_unicam(): ne = lookup(abbr="NE") juris = Jurisdiction.objects.create(id=ne.jurisdiction_id, name=ne.name, division=None) leg = Organization.objects.create( id=ne.legislature_organization_id, name=ne.legislature_name, classification="legislature", jurisdiction=juris, ) create_chamber(juris, leg, ne.legislature) # no org was created, but posts were assert Organization.objects.count() == 1 org = Organization.objects.get(classification="legislature") assert org.posts.count() == 49
def test_create_chamber_basic(): nc = lookup(abbr="NC") juris = Jurisdiction.objects.create(id=nc.jurisdiction_id, name=nc.name, division=None) leg = Organization.objects.create( id=nc.legislature_organization_id, name=nc.legislature_name, classification="legislature", jurisdiction=juris, ) create_chamber(juris, leg, nc.lower) # ensure the org and posts were created org = Organization.objects.get(classification="lower") assert org.name == nc.lower.name assert org.id == nc.lower.organization_id assert org.posts.count() == 120
def test_create_chamber_duplicate_with_changes(): nc = lookup(abbr="NC") juris = Jurisdiction.objects.create(id=nc.jurisdiction_id, name=nc.name, division=None) leg = Organization.objects.create( id=nc.legislature_organization_id, name=nc.legislature_name, classification="legislature", jurisdiction=juris, ) create_chamber(juris, leg, nc.lower) # second call, but lower chamber name has been changed nc.lower.name = "Ronald McDonald House of Clowns" with pytest.raises(IntegrityError): create_chamber(juris, leg, nc.lower) # unsupported, should definitely be loud assert Organization.objects.filter(classification="lower").count() == 1
def test_create_chamber_duplicate_idempotent(): nc = lookup(abbr="NC") juris = Jurisdiction.objects.create(id=nc.jurisdiction_id, name=nc.name, division=None) leg = Organization.objects.create( id=nc.legislature_organization_id, name=nc.legislature_name, classification="legislature", jurisdiction=juris, ) # second call, identical to first, should be idempotent create_chamber(juris, leg, nc.lower) create_chamber(juris, leg, nc.lower) assert Organization.objects.filter(classification="lower").count() == 1 # ensure the org and posts were created org = Organization.objects.get(classification="lower") assert org.name == nc.lower.name assert org.id == nc.lower.organization_id assert org.posts.count() == 120
def load_person(data): # import has to be here so that Django is set up from openstates.data.models import Person, Organization, Post fields = dict( id=data["id"], name=data["name"], given_name=data.get("given_name", ""), family_name=data.get("family_name", ""), gender=data.get("gender", ""), biography=data.get("biography", ""), birth_date=data.get("birth_date", ""), death_date=data.get("death_date", ""), image=data.get("image", ""), extras=data.get("extras", {}), ) person, created, updated = get_update_or_create(Person, fields, ["id"]) updated |= update_subobjects(person, "other_names", data.get("other_names", [])) updated |= update_subobjects(person, "links", data.get("links", [])) updated |= update_subobjects(person, "sources", data.get("sources", [])) identifiers = [] for scheme, value in data.get("ids", {}).items(): identifiers.append({"scheme": scheme, "identifier": value}) for identifier in data.get("other_identifiers", []): identifiers.append(identifier) updated |= update_subobjects(person, "identifiers", identifiers) contact_details = [] for cd in data.get("contact_details", []): for type in ("address", "email", "voice", "fax"): if cd.get(type): contact_details.append({ "note": cd.get("note", ""), "type": type, "value": cd[type] }) updated |= update_subobjects(person, "contact_details", contact_details) memberships = [] primary_party = "" current_jurisdiction_id = None current_role = None for party in data.get("party", []): party_name = party["name"] try: org = cached_lookup(Organization, classification="party", name=party["name"]) except Organization.DoesNotExist: click.secho(f"no such party {party['name']}", fg="red") raise CancelTransaction() memberships.append({ "organization": org, "start_date": party.get("start_date", ""), "end_date": party.get("end_date", ""), }) if role_is_active(party): if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES: raise ValueError( f"two primary parties for ({data['name']} {data['id']})") elif primary_party in MAJOR_PARTIES: # already set correct primary party, so do nothing pass else: primary_party = party_name for role in data.get("roles", []): if role["type"] in ("mayor", ): role_name = "Mayor" org_type = "government" use_district = False elif role["type"] == "governor": role_name = "Governor" if role["jurisdiction"] == "ocd-jurisdiction/country:us/district:dc/government": role_name = "Mayor" org_type = "executive" use_district = False elif role["type"] in ("upper", "lower", "legislature"): org_type = role["type"] use_district = True else: raise ValueError("unsupported role type") try: org = cached_lookup(Organization, classification=org_type, jurisdiction_id=role["jurisdiction"]) if use_district: post = org.posts.get(label=role["district"]) else: post = None except Organization.DoesNotExist: click.secho( f"{person} no such organization {role['jurisdiction']} {org_type}", fg="red") raise CancelTransaction() except Post.DoesNotExist: # if this is a legacy district, be quiet lds = legacy_districts(jurisdiction_id=role["jurisdiction"]) if role["district"] not in lds[role["type"]]: click.secho(f"no such post {role}", fg="red") raise CancelTransaction() else: post = None if role_is_active(role): current_jurisdiction_id = role["jurisdiction"] current_role = { "org_classification": org_type, "district": None, "division_id": None } if use_district: state_metadata = metadata.lookup( jurisdiction_id=role["jurisdiction"]) district = state_metadata.lookup_district(name=str( role["district"]), chamber=role["type"]) assert district current_role["division_id"] = district.division_id current_role["title"] = getattr(state_metadata, role["type"]).title # try to force district to an int for sorting, but allow strings for non-numeric districts try: current_role["district"] = int(role["district"]) except ValueError: current_role["district"] = str(role["district"]) else: current_role["title"] = role_name elif not current_jurisdiction_id: current_jurisdiction_id = role["jurisdiction"] membership = { "organization": org, "post": post, "start_date": role.get("start_date", ""), "end_date": role.get("end_date", ""), } if not use_district: membership["role"] = role_name memberships.append(membership) # note that we don't manage committee memberships here updated |= update_subobjects( person, "memberships", memberships, read_manager=person.memberships.exclude( organization__classification="committee"), ) # set computed fields (avoid extra save) if (person.primary_party != primary_party or person.current_role != current_role or person.current_jurisdiction_id != current_jurisdiction_id): person.primary_party = primary_party person.current_role = current_role person.current_jurisdiction_id = current_jurisdiction_id person.save() return created, updated
def make_ceos(): with open("ceo.csv") as f: data = csv.DictReader(f) for line in data: state = line["State"].strip() given_name = line["First"] family_name = line["Last"] name = f"{given_name} {family_name}" role = line["Role"].strip().lower() addr1 = line["Address 1"] addr2 = line["Address 2"] city = line["City"] state_abbr = line["Postal Code"] zip5 = line["Zip Code"] zip4 = line["Zip Plus 4"] phone = line["Phone"] email = line["Email"] fax = line["Fax"] contact_form = line["Contact Form"] source = line["Source"] twitter = line["Twitter"] party = line["Party"] if party == "R": party = "Republican" elif party == "D": party = "Democratic" else: party = "Independent" if role != "secretary of state": role = "chief election officer" full_address = "; ".join([addr1, addr2, f"{city}, {state_abbr} {zip5}-{zip4}"]) contact = {"note": "Capitol Office"} contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter try: jid = metadata.lookup(name=state).jurisdiction_id except KeyError: continue abbr = metadata.lookup(name=state).abbr.lower() links = [{"url": source}] if contact_form: links.append({"url": contact_form, "note": "webform"}) obj = OrderedDict( { "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "roles": [ { "jurisdiction": jid, "type": role.strip().lower(), "end_date": "2021-12-31", }, ], "contact_details": [contact], "ids": ids, "sources": [{"url": source}], "links": links, "party": [{"name": party}], } ) outdir = f"data/{abbr}/executive/" # os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def make_governors(): with open("governors.csv") as f: data = csv.DictReader(f) for line in data: state = line["state"] name = line["name"] given_name = line["first_name"] family_name = line["last_name"] party = line["party"] birth_date = line["birth_date"] start_date = line["start_date"] end_date = line["end_date"] website = line["website"] twitter = line["twitter"] webform = line["webform"] full_address = "; ".join( [n.strip() for n in line["address"].splitlines()]) phone = line["phone"] email = line["email"] fax = line["fax"] contact = {"note": "Capitol Office"} if full_address: contact["address"] = full_address if fax: contact["fax"] = reformat_phone_number(fax) if phone: contact["voice"] = reformat_phone_number(phone) if email: contact["email"] = email ids = {} if twitter: ids["twitter"] = twitter jid = metadata.lookup(name=state).jurisdiction_id abbr = metadata.lookup(name=state).abbr.lower() obj = OrderedDict({ "id": ocd_uuid("person"), "name": name, "given_name": given_name, "family_name": family_name, "birth_date": birth_date, "party": [{ "name": party }], "roles": [{ "jurisdiction": jid, "type": "governor", "start_date": start_date, "end_date": end_date, }], "contact_details": [contact], "ids": ids, "sources": [{ "url": website }], "links": [{ "url": website }, { "url": webform, "note": "webform" }], }) outdir = f"data/{abbr}/executive/" os.makedirs(outdir) dump_obj(obj, output_dir=outdir)
def legacy_districts(**kwargs): """ can take jurisdiction_id or abbr via kwargs """ legacy_districts = {"upper": [], "lower": []} for d in metadata.lookup(**kwargs).legacy_districts: legacy_districts[d.chamber_type].append(d.name) return legacy_districts
def get_jurisdiction_id(abbr): return metadata.lookup(abbr=abbr).jurisdiction_id
def load_person(data: Person) -> tuple[bool, bool]: # import has to be here so that Django is set up from openstates.data.models import Organization, Post from openstates.data.models import Person as DjangoPerson fields = dict( id=data.id, name=data.name, given_name=data.given_name, family_name=data.family_name, gender=data.gender, email=data.email, biography=data.biography, birth_date=data.birth_date, death_date=data.death_date, image=data.image, extras=data.extras, ) person, created, updated = get_update_or_create(DjangoPerson, fields, ["id"]) updated |= update_subobjects(person, "other_names", [n.dict() for n in data.other_names]) updated |= update_subobjects(person, "links", [n.dict() for n in data.links]) updated |= update_subobjects(person, "sources", [n.dict() for n in data.sources]) updated |= update_subobjects(person, "offices", [n.dict() for n in data.offices]) identifiers = [] for scheme, value in data.ids.dict().items(): if value: identifiers.append({"scheme": scheme, "identifier": value}) for identifier in data.other_identifiers: identifiers.append({ "scheme": identifier.scheme, "identifier": identifier.identifier }) updated |= update_subobjects(person, "identifiers", identifiers) memberships = [] primary_party = "" current_jurisdiction_id = None current_role = None for party in data.party: party_name = party.name try: org = cached_lookup(Organization, classification="party", name=party.name) except Organization.DoesNotExist: click.secho(f"no such party {party.name}", fg="red") raise CancelTransaction() memberships.append({ "organization": org, "start_date": party.start_date, "end_date": party.end_date, }) if party.is_active(): if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES: raise ValueError( f"two primary parties for ({data.name} {data.id})") elif primary_party in MAJOR_PARTIES: # already set correct primary party, so do nothing pass else: primary_party = party_name for role in data.roles: if role.type == "mayor": role_name = "Mayor" org_type = "government" use_district = False elif role.type == "governor": role_name = "Governor" if (role.jurisdiction == "ocd-jurisdiction/country:us/district:dc/government"): role_name = "Mayor" org_type = "executive" use_district = False elif role.type in ("secretary of state", "chief election officer"): role_name = role.type.title() org_type = "executive" use_district = False elif role.type in ("upper", "lower", "legislature"): org_type = role.type use_district = True else: raise ValueError(f"unsupported role type: {role.type}") try: org = cached_lookup(Organization, classification=org_type, jurisdiction_id=role.jurisdiction) if use_district: post = org.posts.get(label=role.district) else: post = None except Organization.DoesNotExist: click.secho( f"{person} no such organization {role.jurisdiction} {org_type}", fg="red", ) raise CancelTransaction() except Post.DoesNotExist: # if this is a legacy district, be quiet lds = legacy_districts(jurisdiction_id=role.jurisdiction) if role.district not in lds[role.type]: click.secho(f"no such post {role}", fg="red") raise CancelTransaction() else: post = None if role.is_active(): current_jurisdiction_id = role.jurisdiction current_role = { "org_classification": org_type, "district": None, "division_id": None, } if use_district: state_metadata = metadata.lookup( jurisdiction_id=role.jurisdiction) district = state_metadata.lookup_district(name=str( role.district), chamber=role.type) assert district current_role["division_id"] = district.division_id current_role["title"] = getattr(state_metadata, role.type).title # try to force district to an int for sorting, but allow strings for non-numeric districts try: current_role["district"] = int( role.district) # type: ignore except ValueError: current_role["district"] = str(role.district) else: current_role["title"] = role_name elif not current_jurisdiction_id: current_jurisdiction_id = role.jurisdiction membership = { "organization": org, "post": post, "start_date": role.start_date, "end_date": role.end_date, } if not use_district: membership["role"] = role_name memberships.append(membership) # note that we don't manage committee memberships here updated |= update_subobjects( person, "memberships", memberships, read_manager=person.memberships.exclude( organization__classification__in=["committee", "subcommittee"]), ) # set computed fields (avoid extra save) if (person.primary_party != primary_party or person.current_role != current_role or person.current_jurisdiction_id != current_jurisdiction_id): person.primary_party = primary_party person.current_role = current_role person.current_jurisdiction_id = current_jurisdiction_id person.save() return created, updated
if role.district not in lds[role.type]: click.secho(f"no such post {role}", fg="red") raise CancelTransaction() else: post = None if role.is_active(): current_jurisdiction_id = role.jurisdiction current_role = { "org_classification": org_type, "district": None, "division_id": None, } if use_district: state_metadata = metadata.lookup( jurisdiction_id=role.jurisdiction) district = state_metadata.lookup_district(name=str( role.district), chamber=role.type) assert district current_role["division_id"] = district.division_id current_role["title"] = getattr(state_metadata, role.type).title # try to force district to an int for sorting, but allow strings for non-numeric districts try: current_role["district"] = int( role.district) # type: ignore except ValueError: current_role["district"] = str(role.district) else: current_role["title"] = role_name