Пример #1
0
    def summarize_person(self, person):
        role_type = None
        district = None

        self.person_count += 1
        self.optional_fields.update(set(person.keys()) & self.OPTIONAL_FIELD_SET)
        self.extra_counts.update(person.get("extras", {}).keys())

        for role in person.get("roles", []):
            if role_is_active(role):
                role_type = role["type"]
                district = role.get("district")
                break
        self.active_legislators[role_type][district].append(person)

        for role in person.get("party", []):
            if role_is_active(role):
                self.parties[role["name"]] += 1

        for cd in person.get("contact_details", []):
            for key, value in cd.items():
                if key != "note":
                    self.contact_counts[key] += 1
                    # currently too aggressive:
                    # plenty of valid cases where legislators share
                    # phone numbers & addresses apparently
                    # self.duplicate_values[key][value].append(person)

        for scheme, value in person.get("ids", {}).items():
            self.id_counts[scheme] += 1
            self.duplicate_values[scheme][value].append(person)
        for id in person.get("other_identifiers", []):
            self.id_counts[id["scheme"]] += 1
            self.duplicate_values[id["scheme"]][id["identifier"]].append(person)
Пример #2
0
    def summarize_person(self, person):
        role_type = None
        district = None

        self.person_count += 1
        self.optional_fields.update(
            set(person.keys()) & self.OPTIONAL_FIELD_SET)
        self.extra_counts.update(person.get('extras', {}).keys())

        for role in person.get('roles', []):
            if role_is_active(role):
                role_type = role['type']
                district = role.get('district')
                break
        self.active_legislators[role_type][district].append(person)

        for role in person.get('party', []):
            if role_is_active(role):
                self.parties[role['name']] += 1

        for cd in person.get('contact_details', []):
            for key in cd:
                if key != 'note':
                    self.contact_counts[key] += 1

        for scheme in person.get('ids', {}):
            self.id_counts[scheme] += 1
        for id in person.get('other_identifiers', []):
            self.id_counts[id['scheme']] += 1
Пример #3
0
    def summarize(self, person):
        self.person_count += 1
        self.optional_fields.update(set(person.keys()) & OPTIONAL_FIELD_SET)
        self.extra_counts.update(person.get("extras", {}).keys())

        district = role_type = None
        for role in person.get("roles", []):
            if role_is_active(role):
                role_type = role["type"]
                district = role.get("district")
                break
        if role_type:
            self.active_legislators[role_type][district].append(person)

        for role in person.get("party", []):
            if role_is_active(role):
                self.parties[role["name"]] += 1

        for cd in person.get("contact_details", []):
            for key, value in cd.items():
                if key != "note":
                    self.contact_counts[cd["note"] + " " + key] += 1

        for scheme, value in person.get("ids", {}).items():
            self.id_counts[scheme] += 1
        for id in person.get("other_identifiers", []):
            if id["scheme"] not in ("openstates", "legacy_openstates"):
                self.id_counts[id["scheme"]] += 1
Пример #4
0
    def validate_person(self, person, filename, person_type, date=None):
        self.errors[filename] = validate_obj(person, PERSON_FIELDS)
        uid = person["id"].split("/")[1]
        if uid not in filename:
            self.errors[filename].append(f"id piece {uid} not in filename")
        self.errors[filename].extend(
            validate_jurisdictions(person, self.municipalities))
        self.errors[filename].extend(
            validate_roles(person,
                           "roles",
                           person_type == PersonType.RETIRED,
                           date=date))
        if person_type in (PersonType.LEGISLATIVE, PersonType.EXECUTIVE):
            self.errors[filename].extend(validate_roles(person, "party"))

        self.errors[filename].extend(validate_offices(person))

        # active party validation
        active_parties = []
        for party in person.get("party", []):
            if party["name"] not in self.valid_parties:
                self.errors[filename].append(f"invalid party {party['name']}")
            if role_is_active(party):
                active_parties.append(party["name"])
        if len(active_parties) > 1:
            if len(
                [party
                 for party in active_parties if party in MAJOR_PARTIES]) > 1:
                self.errors[filename].append(
                    f"multiple active major party memberships {active_parties}"
                )
            else:
                self.warnings[filename].append(
                    f"multiple active party memberships {active_parties}")

        # TODO: this was too ambitious, disabling this for now
        # self.warnings[filename] = self.check_https(person)
        if person_type == PersonType.RETIRED:
            self.errors[filename].extend(
                self.validate_old_district_names(person))

        # check duplicate IDs
        for scheme, value in person.get("ids", {}).items():
            self.duplicate_values[scheme][value].append(filename)
        for id in person.get("other_identifiers", []):
            self.duplicate_values[id["scheme"]][id["identifier"]].append(
                filename)

        # update active legislators
        if person_type == PersonType.LEGISLATIVE:
            role_type = district = None
            for role in person.get("roles", []):
                if role_is_active(role, date=date):
                    role_type = role["type"]
                    district = role.get("district")
                    break
            self.active_legislators[role_type][district].append(filename)
Пример #5
0
def retire_person(person, end_date):
    num = 0
    for role in person['roles']:
        if role_is_active(role):
            role['end_date'] = end_date
            num += 1
    return person, num
Пример #6
0
 def validate_person(self, person, filename, person_type):
     self.errors[filename] = validate_obj(person, PERSON_FIELDS)
     uid = person["id"].split("/")[1]
     if uid not in filename:
         self.errors[filename].append(f"id piece {uid} not in filename")
     self.errors[filename].extend(validate_jurisdictions(person, self.municipalities))
     self.errors[filename].extend(
         validate_roles(person, "roles", person_type == PersonType.RETIRED)
     )
     if person_type in (PersonType.LEGISLATIVE, PersonType.EXECUTIVE):
         self.errors[filename].extend(validate_roles(person, "party"))
     active_parties = []
     for party in person.get("party", []):
         if party["name"] not in self.valid_parties:
             self.errors[filename].append(f"invalid party {party['name']}")
         if role_is_active(party):
             active_parties.append(party["name"])
     if len(active_parties) > 1:
         if len([party for party in active_parties if party in MAJOR_PARTIES]) > 1:
             self.errors[filename].append(
                 f"multiple active major party memberships {active_parties}"
             )
         else:
             self.warnings[filename].append(
                 f"multiple active party memberships {active_parties}"
             )
     # TODO: this was too ambitious, disabling this for now
     # self.warnings[filename] = self.check_https(person)
     self.person_mapping[person["id"]] = person["name"]
     if person_type == PersonType.RETIRED:
         self.retired_count += 1
         self.errors[filename].extend(self.validate_old_district_names(person))
     elif person_type == PersonType.LEGISLATIVE:
         self.summarize_person(person)
Пример #7
0
def retire_from_committee(committee, person_id, end_date):
    num = 0
    for role in committee['memberships']:
        if role.get('id') == person_id and role_is_active(role):
            role['end_date'] = end_date
            num += 1
    return committee, num
Пример #8
0
def generate_template_csv(abbreviations, filename, missing_id=None):
    fields = ("id", "name", "chamber", "district", "jurisdiction")

    with open(filename, "w") as outfile:
        out = csv.DictWriter(outfile, fields)
        out.writeheader()

        for abbr in abbreviations:
            for person, filename in iter_objects(abbr, "people"):
                skip = False

                if missing_id:
                    for oid in person.get("other_identifiers", []):
                        if oid["scheme"] == missing_id:
                            skip = True
                            break

                if not skip:
                    for role in person["roles"]:
                        if role_is_active(role):
                            break
                    else:
                        raise Exception()
                    out.writerow({
                        "id": person["id"],
                        "name": person["name"],
                        "chamber": role["type"],
                        "district": role["district"],
                        "jurisdiction": role["jurisdiction"],
                    })
Пример #9
0
def retire_from_committee(committee, person_id, end_date):
    num = 0
    for role in committee["memberships"]:
        if role.get("id") == person_id and role_is_active(role):
            role["end_date"] = end_date
            num += 1
    return committee, num
Пример #10
0
def generate_template_csv(abbreviations, filename, missing_id=None):
    fields = ('id', 'name', 'chamber', 'district', 'jurisdiction')

    with open(filename, 'w') as outfile:
        out = csv.DictWriter(outfile, fields)
        out.writeheader()

        for abbr in abbreviations:
            for person, filename in iter_objects(abbr, 'people'):
                skip = False

                if missing_id:
                    for oid in person.get('other_identifiers', []):
                        if oid['scheme'] == missing_id:
                            skip = True
                            break

                if not skip:
                    for role in person['roles']:
                        if role_is_active(role):
                            break
                    else:
                        raise Exception()
                    out.writerow({
                        'id': person['id'],
                        'name': person['name'],
                        'chamber': role['type'],
                        'district': role['district'],
                        'jurisdiction': role['jurisdiction'],
                    })
Пример #11
0
def validate_roles(person, roles_key, retired=False):
    active = [role for role in person[roles_key] if role_is_active(role)]
    if len(active) == 0 and not retired:
        return [f"no active {roles_key}"]
    elif roles_key == "roles" and retired and len(active) > 0:
        return [f"{len(active)} active roles on retired person"]
    elif roles_key == "roles" and len(active) > 1:
        return [f"{len(active)} active roles"]
    return []
Пример #12
0
def retire_person(person, end_date, reason=None, death=False):
    num = 0
    for role in person["roles"]:
        if role_is_active(role):
            role["end_date"] = end_date
            if reason:
                role["end_reason"] = reason
            num += 1

    if death:
        person["death_date"] = end_date

    return person, num
Пример #13
0
    def summarize_org(self, org):
        self.org_count += 1

        if org["parent"].startswith("ocd-organization"):
            self.parent_types["subcommittee of " + org["parent"]] += 1
        else:
            self.parent_types[org["parent"]] += 1

        for m in org["memberships"]:
            if not m.get("id"):
                self.missing_person_id += 1
            if role_is_active(m):
                self.role_types[m.get("role", "member")] += 1
Пример #14
0
def retire_person(person, end_date, reason=None, death=False):
    num = 0
    for role in person['roles']:
        if role_is_active(role):
            role['end_date'] = end_date
            if reason:
                role['end_reason'] = reason
            num += 1

    if death:
        person['death_date'] = end_date

    return person, num
Пример #15
0
    def summarize_org(self, org):
        self.org_count += 1

        if org['parent'].startswith('ocd-organization'):
            self.parent_types['subcommittee of ' + org['parent']] += 1
        else:
            self.parent_types[org['parent']] += 1

        for m in org['memberships']:
            if not m.get('id'):
                self.missing_person_id += 1
            if role_is_active(m):
                self.role_types[m.get('role', 'member')] += 1
Пример #16
0
def retire_person(person, end_date, reason=None, death=False):
    num = 0
    for role in person["roles"]:
        if role_is_active(role):
            role["end_date"] = end_date
            if reason:
                role["end_reason"] = reason
            num += 1

    if death:
        person["death_date"] = end_date

    # remove old contact details
    person.pop("contact_details", None)

    return person, num
Пример #17
0
    def update(self, existing, new):
        moving = ""

        # end any active roles
        for role in existing.data['roles']:
            district = role['district']
            seat = role['type'], int(
                district) if district.isdigit() else district
            if role_is_active(role) and seat != new.seat:
                role['end_date'] = self.end_date
                moving = f" and moving to {new.seat}"

        click.secho(
            f"In {existing.seat} updating "
            f"{existing.name}{moving}.",
            fg='yellow')

        if self.save:
            existing.merge(new)
            existing.save()
Пример #18
0
def load_person(data):
    # import has to be here so that Django is set up
    from openstates.data.models import Person, Organization, Post

    fields = dict(
        id=data["id"],
        name=data["name"],
        given_name=data.get("given_name", ""),
        family_name=data.get("family_name", ""),
        gender=data.get("gender", ""),
        biography=data.get("biography", ""),
        birth_date=data.get("birth_date", ""),
        death_date=data.get("death_date", ""),
        image=data.get("image", ""),
        extras=data.get("extras", {}),
    )
    person, created, updated = get_update_or_create(Person, fields, ["id"])

    updated |= update_subobjects(person, "other_names",
                                 data.get("other_names", []))
    updated |= update_subobjects(person, "links", data.get("links", []))
    updated |= update_subobjects(person, "sources", data.get("sources", []))

    identifiers = []
    for scheme, value in data.get("ids", {}).items():
        identifiers.append({"scheme": scheme, "identifier": value})
    for identifier in data.get("other_identifiers", []):
        identifiers.append(identifier)
    updated |= update_subobjects(person, "identifiers", identifiers)

    contact_details = []
    for cd in data.get("contact_details", []):
        for type in ("address", "email", "voice", "fax"):
            if cd.get(type):
                contact_details.append({
                    "note": cd.get("note", ""),
                    "type": type,
                    "value": cd[type]
                })
    updated |= update_subobjects(person, "contact_details", contact_details)

    memberships = []
    primary_party = ""
    active_division_id = ""
    current_state = ""
    for party in data.get("party", []):
        party_name = party["name"]
        try:
            org = cached_lookup(Organization,
                                classification="party",
                                name=party["name"])
        except Organization.DoesNotExist:
            click.secho(f"no such party {party['name']}", fg="red")
            raise CancelTransaction()
        memberships.append({
            "organization": org,
            "start_date": party.get("start_date", ""),
            "end_date": party.get("end_date", ""),
        })
        if role_is_active(party):
            if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES:
                raise ValueError(
                    f"two primary parties for ({data['name']} {data['id']})")
            elif primary_party in MAJOR_PARTIES:
                # already set correct primary party, so do nothing
                pass
            else:
                primary_party = party_name
    for role in data.get("roles", []):
        if role["type"] not in ("upper", "lower", "legislature"):
            raise ValueError("unsupported role type")
        try:
            org = cached_lookup(
                Organization,
                classification=role["type"],
                jurisdiction_id=role["jurisdiction"],
            )
            post = org.posts.get(label=role["district"])
        except Organization.DoesNotExist:
            click.secho(
                f"{person} no such organization {role['jurisdiction']} {role['type']}",
                fg="red",
            )
            raise CancelTransaction()
        except Post.DoesNotExist:
            # if this is a legacy district, be quiet
            lds = legacy_districts(jurisdiction_id=role["jurisdiction"])
            if role["district"] not in lds[role["type"]]:
                click.secho(f"no such post {role}", fg="red")
                raise CancelTransaction()
            else:
                post = None
        if role_is_active(role):
            state_metadata = metadata.lookup(
                jurisdiction_id=role["jurisdiction"])
            district = state_metadata.lookup_district(name=str(
                role["district"]),
                                                      chamber=role["type"])
            assert district
            active_division_id = district.division_id
            current_state = state_metadata.abbr.upper()
        elif not current_state:
            # set current_state to *something* -- since legislators
            # are only going to ever appear in one state this is fine
            # it may become necessary to make this smarter if legislators start
            # crossing state lines, but we don't have any examples of this
            state_metadata = metadata.lookup(
                jurisdiction_id=role["jurisdiction"])
            current_state = state_metadata.abbr.upper()
        memberships.append({
            "organization": org,
            "post": post,
            "start_date": role.get("start_date", ""),
            "end_date": role.get("end_date", ""),
        })

    # note that we don't manage committee memberships here
    updated |= update_subobjects(
        person,
        "memberships",
        memberships,
        read_manager=person.memberships.exclude(
            organization__classification="committee"),
    )

    # set computed fields (avoid extra save)
    if (person.current_role_division_id != active_division_id
            or person.primary_party != primary_party
            or person.current_state != current_state):
        person.current_role_division_id = active_division_id
        person.current_state = current_state
        person.primary_party = primary_party
        person.save()

    return created, updated
Пример #19
0
import glob
from utils import load_yaml, dump_obj, role_is_active

for file in glob.glob("data/ca/legislature/*.yml"):
    with open(file) as inf:
        data = load_yaml(inf)
        for role in data["roles"]:
            if role_is_active(role):
                letter = "A" if role["type"] == "lower" else "S"
                district = int(role["district"])
        url = f"https://lcmspubcontact.lc.ca.gov/PublicLCMS/ContactPopup.php?district={letter}D{district:02d}&inframe=N"
        data["links"].append({"url": url, "note": "Contact Form"},)
        dump_obj(data, filename=file)
Пример #20
0
def load_person(data):
    # import has to be here so that Django is set up
    from openstates.data.models import Person, Organization, Post

    fields = dict(
        id=data["id"],
        name=data["name"],
        given_name=data.get("given_name", ""),
        family_name=data.get("family_name", ""),
        gender=data.get("gender", ""),
        biography=data.get("biography", ""),
        birth_date=data.get("birth_date", ""),
        death_date=data.get("death_date", ""),
        image=data.get("image", ""),
        extras=data.get("extras", {}),
    )
    person, created, updated = get_update_or_create(Person, fields, ["id"])

    updated |= update_subobjects(person, "other_names", data.get("other_names", []))
    updated |= update_subobjects(person, "links", data.get("links", []))
    updated |= update_subobjects(person, "sources", data.get("sources", []))

    identifiers = []
    for scheme, value in data.get("ids", {}).items():
        identifiers.append({"scheme": scheme, "identifier": value})
    for identifier in data.get("other_identifiers", []):
        identifiers.append(identifier)
    updated |= update_subobjects(person, "identifiers", identifiers)

    contact_details = []
    for cd in data.get("contact_details", []):
        for type in ("address", "email", "voice", "fax"):
            if cd.get(type):
                contact_details.append(
                    {"note": cd.get("note", ""), "type": type, "value": cd[type]}
                )
    updated |= update_subobjects(person, "contact_details", contact_details)

    memberships = []
    primary_party = ""
    current_jurisdiction_id = None
    current_role = None
    for party in data.get("party", []):
        party_name = party["name"]
        try:
            org = cached_lookup(Organization, classification="party", name=party["name"])
        except Organization.DoesNotExist:
            click.secho(f"no such party {party['name']}", fg="red")
            raise CancelTransaction()
        memberships.append(
            {
                "organization": org,
                "start_date": party.get("start_date", ""),
                "end_date": party.get("end_date", ""),
            }
        )
        if role_is_active(party):
            if primary_party in MAJOR_PARTIES and party_name in MAJOR_PARTIES:
                raise ValueError(f"two primary parties for ({data['name']} {data['id']})")
            elif primary_party in MAJOR_PARTIES:
                # already set correct primary party, so do nothing
                pass
            else:
                primary_party = party_name
    for role in data.get("roles", []):
        if role["type"] in ("mayor",):
            role_name = "Mayor"
            org_type = "government"
            use_district = False
        elif role["type"] == "governor":
            role_name = "Governor"
            if role["jurisdiction"] == "ocd-jurisdiction/country:us/district:dc/government":
                role_name = "Mayor"
            org_type = "executive"
            use_district = False
        elif role["type"] in ("upper", "lower", "legislature"):
            org_type = role["type"]
            use_district = True
        else:
            raise ValueError("unsupported role type")
        try:
            org = cached_lookup(
                Organization, classification=org_type, jurisdiction_id=role["jurisdiction"]
            )
            if use_district:
                post = org.posts.get(label=role["district"])
            else:
                post = None
        except Organization.DoesNotExist:
            click.secho(
                f"{person} no such organization {role['jurisdiction']} {org_type}", fg="red"
            )
            raise CancelTransaction()
        except Post.DoesNotExist:
            # if this is a legacy district, be quiet
            lds = legacy_districts(jurisdiction_id=role["jurisdiction"])
            if role["district"] not in lds[role["type"]]:
                click.secho(f"no such post {role}", fg="red")
                raise CancelTransaction()
            else:
                post = None

        if role_is_active(role):
            current_jurisdiction_id = role["jurisdiction"]

            current_role = {"org_classification": org_type, "district": None, "division_id": None}
            if use_district:
                state_metadata = metadata.lookup(jurisdiction_id=role["jurisdiction"])
                district = state_metadata.lookup_district(
                    name=str(role["district"]), chamber=role["type"]
                )
                assert district
                current_role["division_id"] = district.division_id
                current_role["title"] = getattr(state_metadata, role["type"]).title
                # try to force district to an int for sorting, but allow strings for non-numeric districts
                try:
                    current_role["district"] = int(role["district"])
                except ValueError:
                    current_role["district"] = str(role["district"])
            else:
                current_role["title"] = role_name
        elif not current_jurisdiction_id:
            current_jurisdiction_id = role["jurisdiction"]

        membership = {
            "organization": org,
            "post": post,
            "start_date": role.get("start_date", ""),
            "end_date": role.get("end_date", ""),
        }
        if not use_district:
            membership["role"] = role_name
        memberships.append(membership)

    # note that we don't manage committee memberships here
    updated |= update_subobjects(
        person,
        "memberships",
        memberships,
        read_manager=person.memberships.exclude(organization__classification="committee"),
    )

    # set computed fields (avoid extra save)
    if (
        person.primary_party != primary_party
        or person.current_role != current_role
        or person.current_jurisdiction_id != current_jurisdiction_id
    ):
        person.primary_party = primary_party
        person.current_role = current_role
        person.current_jurisdiction_id = current_jurisdiction_id
        person.save()

    return created, updated
Пример #21
0
def get_chamber_and_district(person):
    for role in person["roles"]:
        if role_is_active(role):
            return role["type"], role["district"]
Пример #22
0
def write_csv(files, jurisdiction_id, output_filename):
    with open(output_filename, "w") as outf:
        out = csv.DictWriter(
            outf,
            ("id", "name",
             "current_party", "current_district", "current_chamber",
             "given_name", "family_name", "gender", "biography", "birth_date", "death_date",
             "image", "links", "sources",
             "capitol_address", "capitol_email", "capitol_voice", "capitol_fax",
             "district_address", "district_email", "district_voice", "district_fax",
             "twitter", "youtube", "instagram", "facebook",
             )
        )
        out.writeheader()

        for filename in files:
            with open(filename) as f:
                data = load_yaml(f)

                # current party
                for role in data["party"]:
                    if role_is_active(role):
                        current_party = role["name"]
                        break

                # current district
                for role in data["roles"]:
                    if role_is_active(role):
                        current_chamber = role["type"]
                        current_district = role["district"]

                district_address = district_email = district_voice = district_fax = None
                capitol_address = capitol_email = capitol_voice = capitol_fax = None
                for cd in data.get("contact_details", {}):
                    note = cd["note"].lower()
                    if "district" in note:
                        district_address = cd.get("address")
                        district_email = cd.get("email")
                        district_voice = cd.get("voice")
                        district_fax = cd.get("fax")
                    elif "capitol" in note:
                        capitol_address = cd.get("address")
                        capitol_email = cd.get("email")
                        capitol_voice = cd.get("voice")
                        capitol_fax = cd.get("fax")
                    else:
                        click.secho("unknown office: " + note, fg="red")

                links = ";".join(l["url"] for l in data.get("links", []))
                sources = ";".join(l["url"] for l in data.get("sources", []))

                obj = {
                    "id": data["id"],
                    "name": data["name"],
                    "current_party": current_party,
                    "current_district": current_district,
                    "current_chamber": current_chamber,
                    "given_name": data.get("given_name"),
                    "family_name": data.get("family_name"),
                    "gender": data.get("gender"),
                    "biography": data.get("biography"),
                    "birth_date": data.get("birth_date"),
                    "death_date": data.get("death_date"),
                    "image": data.get("image"),
                    "twitter": data.get("ids", {}).get("twitter"),
                    "youtube": data.get("ids", {}).get("youtube"),
                    "instagram": data.get("ids", {}).get("instagram"),
                    "facebook": data.get("ids", {}).get("facebook"),
                    "links": links,
                    "sources": sources,
                    "district_address": district_address,
                    "district_email": district_email,
                    "district_voice": district_voice,
                    "district_fax": district_fax,
                    "capitol_address": capitol_address,
                    "capitol_email": capitol_email,
                    "capitol_voice": capitol_voice,
                    "capitol_fax": capitol_fax,
                }
                out.writerow(obj)

    click.secho(f'processed {len(files)} files', fg='green')
Пример #23
0
def is_inactive(person, date=None):
    active = [
        role for role in person.get("roles", [])
        if role_is_active(role, date=date)
    ]
    return len(active) == 0
Пример #24
0
def dir_to_mongo(abbr, create, clear_old_roles, verbose):
    db = pymongo.MongoClient(os.environ.get('BILLY_MONGO_HOST',
                                            'localhost'))['fiftystates']

    metadata = db.metadata.find({'_id': abbr})[0]
    latest_term = metadata['terms'][-1]['name']

    active_ids = []

    for person, filename in iter_objects(abbr, 'people'):

        legacy_ids = [
            oid['identifier'] for oid in person.get('other_identifiers', [])
            if oid['scheme'] == 'legacy_openstates'
        ]
        if not legacy_ids:
            if create:
                # get next ID
                new_id = get_next_id(db, abbr)
                legacy_ids = [new_id]
                if 'other_identifiers' not in person:
                    person['other_identifiers'] = []
                person['other_identifiers'].append({
                    'scheme': 'legacy_openstates',
                    'identifier': new_id
                })
                dump_obj(person, filename=filename)
            else:
                click.secho(
                    f'{filename} does not have legacy ID, run with --create',
                    fg='red')
                sys.exit(1)

        active_ids.append(legacy_ids[0])

        # handle name
        prefix, first_name, last_name, suffixes = name_tools.split(
            person['name'])

        # get chamber, district, party
        for role in person['roles']:
            if role_is_active(role):
                chamber = role['type']
                district = role['district']
                break
        for role in person['party']:
            if role_is_active(role):
                party = role['name']

        url = person['links'][0]['url']
        email = ''

        offices = []
        for cd in person.get('contact_details', []):
            office = {
                'fax':
                cd.get('fax'),
                'phone':
                cd.get('voice'),
                'address':
                cd.get('address'),
                'email':
                cd.get('email'),
                'name':
                cd['note'],
                'type':
                'capitol' if 'capitol' in cd['note'].lower() else 'district'
            }
            offices.append(office)
            if office['email'] and not email:
                email = office['email']

        # NE & DC
        if chamber == 'legislature':
            chamber = 'upper'

        # get some old data to keep around
        created_at = datetime.datetime.utcnow()
        old_roles = {}
        old_person = None
        try:
            old_person = db.legislators.find({'_id': legacy_ids[0]})[0]
            created_at = old_person['created_at']
            if not clear_old_roles:
                old_roles = old_person.get('old_roles', {})
        except IndexError:
            pass

        mongo_person = {
            '_id':
            legacy_ids[0],
            'leg_id':
            legacy_ids[0],
            '_all_ids':
            legacy_ids,
            '_type':
            'person',
            'active':
            True,
            'full_name':
            person['name'],
            '_scraped_name':
            person['name'],
            'photo_url':
            person.get('image'),
            'state':
            abbr,
            'district':
            district,
            'chamber':
            chamber,
            'party':
            party,
            'email':
            email,
            'url':
            url,
            'offices':
            offices,
            'created_at':
            created_at,
            'first_name':
            first_name,
            'middle_name':
            '',
            'last_name':
            last_name,
            'suffixes':
            suffixes,
            'sources':
            person['sources'],
            'old_roles':
            old_roles,
            'roles': [
                {
                    'term': latest_term,
                    'district': district,
                    'chamber': chamber,
                    'state': abbr,
                    'party': party,
                    'type': 'member',
                    'start_date': None,
                    'end_date': None
                },
            ],
        }
        # TODO: committee info
        # { "term" : "2017-2018", "committee_id" : "NCC000233", "chamber" : "lower",
        # "state" : "nc", "subcommittee" : null, "committee" : "State and Local Government II",
        # "position" : "member", "type" : "committee member" },

        # compare
        if old_person:
            old_person.pop('updated_at', None)
        if old_person == mongo_person:
            if verbose:
                click.secho(f'no updates to {mongo_person["_id"]}')
        else:
            # print(mongo_person, old_person)
            # raise Exception()
            click.secho(f'updating {mongo_person["_id"]}', fg='green')
            mongo_person['updated_at'] = datetime.datetime.utcnow()
            try:
                db.legislators.save(mongo_person)
            except Exception as e:
                print(e)
                continue

    to_retire = db.legislators.find({
        '_id': {
            '$nin': active_ids
        },
        'state': abbr
    })
    click.secho(f'going to try to retire {to_retire.count()}')
    for leg in to_retire:
        retire_person(db, leg)
Пример #25
0
def test_role_is_active(role, expected):
    assert role_is_active(role) == expected
Пример #26
0
def get_chamber_and_district(person):
    for role in person['roles']:
        if role_is_active(role):
            return role['type'], role['district']