Пример #1
0
def status() -> None:
    init_django()
    from openstates.data.models import Bill

    states = sorted(CONVERSION_FUNCTIONS.keys())
    click.secho("state |  bills  | missing | errors ", fg="white")
    click.secho("===================================", fg="white")
    for state in states:
        all_bills = Bill.objects.filter(
            legislative_session__jurisdiction_id=abbr_to_jid(state)
        )
        missing_search = all_bills.filter(searchable__isnull=True).count()
        errors = all_bills.filter(searchable__is_error=True).count()
        all_bills = all_bills.count()

        errcolor = mscolor = "green"
        if missing_search > 0:
            missing_search = math.ceil(missing_search / all_bills * 100)
            mscolor = "yellow"
        if missing_search > 1:
            mscolor = "red"
        if errors > 0:
            errcolor = "yellow"
            errors = math.ceil(errors / all_bills * 100)
        if errors > 5:
            errcolor = "red"

        click.echo(
            f"{state:5} | {all_bills:7} | "
            + click.style(f"{missing_search:6}%", fg=mscolor)
            + " | "
            + click.style(f"{errors:6}%", fg=errcolor)
        )
Пример #2
0
def to_csv(abbreviations: list[str], upload: bool) -> None:
    """
    Generate CSV files for YAML and optionally sync to S3.
    """
    if not abbreviations:
        abbreviations = get_all_abbreviations()

    if upload:
        s3 = boto3.client("s3")

    for abbr in abbreviations:
        click.secho("==== {} ====".format(abbr), bold=True)
        jurisdiction_id = abbr_to_jid(abbr)
        directory = get_data_path(abbr)
        person_files = sorted((directory / "legislature").glob("*.yml"))
        fname = f"{abbr}.csv"
        write_csv(person_files, jurisdiction_id, fname)

        if upload:
            s3.upload_file(
                fname,
                "data.openstates.org",
                f"people/current/{abbr}.csv",
                ExtraArgs={"ContentType": "text/csv", "ACL": "public-read"},
            )
            click.secho(f"uploaded to data.openstates.org/people/current/{abbr}.csv", fg="green")
Пример #3
0
def main(input_dir: str) -> None:
    """
    Convert scraped JSON in INPUT_DIR to YAML files for this repo.

    Will put data into incoming/ directory for usage with merge.py's --incoming option.
    """

    # abbr is last piece of directory name
    abbr = ""
    for piece in input_dir.split("/")[::-1]:
        if piece:
            abbr = piece
            break

    jurisdiction_id = abbr_to_jid(abbr)

    output_dir = get_data_path(abbr)
    output_dir = Path(str(output_dir).replace("data",
                                              "incoming")) / "legislature"
    assert "incoming" in str(output_dir)

    try:
        output_dir.mkdir()
    except FileExistsError:
        for file in output_dir.glob("*.yml"):
            file.unlink()
    process_dir(Path(input_dir), output_dir, jurisdiction_id)
Пример #4
0
def reindex_state(state: str, session: str = None) -> None:
    init_django()
    from openstates.data.models import SearchableBill

    if session:
        bills = SearchableBill.objects.filter(
            bill__legislative_session__jurisdiction_id=abbr_to_jid(state),
            bill__legislative_session__identifier=session,
        )
    else:
        bills = SearchableBill.objects.filter(
            bill__legislative_session__jurisdiction_id=abbr_to_jid(state)
        )

    ids = list(bills.values_list("id", flat=True))
    print(f"reindexing {len(ids)} bills for state")
    reindex(ids)
Пример #5
0
    def to_dict(self):
        party = PARTIES.get(self.party.lower(), self.party)
        d = OrderedDict({
            "id":
            f"ocd-person/{uuid.uuid4()}",
            "name":
            str(self.name),
            "party": [{
                "name": party
            }],
            "roles": [{
                "district": self.district,
                "type": self.chamber,
                "jurisdiction": abbr_to_jid(self.state),
            }],
            "links":
            self.links,
            "sources":
            self.sources,
        })
        if self.given_name:
            d["given_name"] = str(self.given_name)
        if self.family_name:
            d["family_name"] = str(self.family_name)
        if self.suffix:
            d["suffix"] = str(self.suffix)
        if self.image:
            d["image"] = str(self.image)
        if self.email:
            d["email"] = str(self.email)
        if self.ids:
            d["ids"] = self.ids
        if self.extras:
            d["extras"] = self.extras

        # contact details
        d["contact_details"] = []
        if self.district_office.to_dict():
            d["contact_details"].append(self.district_office.to_dict())
        if self.capitol_office.to_dict():
            d["contact_details"].append(self.capitol_office.to_dict())

        return d
Пример #6
0
def _resample(state: str, n: int = 50) -> None:
    """
    Grab new versions for a state from the database.
    """
    init_django()
    from openstates.data.models import BillVersion

    versions = BillVersion.objects.filter(
        bill__legislative_session__jurisdiction_id=abbr_to_jid(state)
    ).order_by("?")[:n]

    count = 0
    fieldnames = [
        "id",
        "session",
        "identifier",
        "title",
        "jurisdiction_id",
        "media_type",
        "url",
        "note",
    ]

    with open(get_raw_dir() / f"{state}.csv", "w") as outf:
        out = csv.DictWriter(outf, fieldnames=fieldnames)
        out.writeheader()
        for v in versions:
            for link in v.links.all():
                out.writerow(
                    {
                        "id": v.id,
                        "session": v.bill.legislative_session.identifier,
                        "jurisdiction_id": v.bill.legislative_session.jurisdiction_id,
                        "identifier": v.bill.identifier,
                        "title": v.bill.title,
                        "url": link.url,
                        "media_type": link.media_type,
                        "note": v.note,
                    }
                )
                count += 1
    click.secho(f"wrote new sample csv with {count} records")
Пример #7
0
def create_person(
    fname: str,
    lname: str,
    name: str,
    state: str,
    district: str,
    party: str,
    rtype: str,
    url: str,
    image: str,
    email: str,
    start_date: str,
) -> None:
    role = Role(
        type=rtype, district=district, jurisdiction=abbr_to_jid(state), start_date=start_date
    )

    if rtype in ("upper", "lower", "legislature"):
        directory = "legislature"
    elif rtype in ("mayor",):
        directory = "municipalities"
    elif rtype in ("governor", "lt_governor"):
        directory = "executive"

    person = Person(
        id=ocd_uuid("person"),
        name=name or f"{fname} {lname}",
        given_name=fname,
        family_name=lname,
        image=image,
        email=email,
        party=[Party(name=party)],
        roles=[role],
        links=[Link(url=url)],
        sources=[Link(url=url)],
    )

    output_dir = get_data_path(state) / directory
    dump_obj(person.dict(exclude_defaults=True), output_dir=output_dir)
Пример #8
0
def update(
    state: str, n: int, clear_errors: bool, checkpoint: int, session: str = None
) -> None:
    init_django()
    from openstates.data.models import Bill, SearchableBill

    # print status within checkpoints
    status_num = checkpoint / 5

    if state == "all":
        all_bills = Bill.objects.all()
    elif session:
        all_bills = Bill.objects.filter(
            legislative_session__jurisdiction_id=abbr_to_jid(state),
            legislative_session__identifier=session,
        )
    else:
        all_bills = Bill.objects.filter(
            legislative_session__jurisdiction_id=abbr_to_jid(state)
        )

    if clear_errors:
        if state == "all":
            print("--clear-errors only works with specific states, not all")
            return
        errs = SearchableBill.objects.filter(bill__in=all_bills, is_error=True)
        print(f"clearing {len(errs)} errors")
        errs.delete()

    missing_search = all_bills.filter(searchable__isnull=True)
    if state == "all":
        MAX_UPDATE = 1000
        aggregates = missing_search.values(
            "legislative_session__jurisdiction__name"
        ).annotate(count=Count("id"))
        for agg in aggregates:
            state_name = agg["legislative_session__jurisdiction__name"]
            if agg["count"] > MAX_UPDATE:
                click.secho(
                    f"Too many bills to update for {state_name}: {agg['count']}, skipping",
                    fg="red",
                )
                missing_search = missing_search.exclude(
                    legislative_session__jurisdiction__name=state_name
                )
        print(f"{len(missing_search)} missing, updating")
    else:
        print(
            f"{state}: {len(all_bills)} bills, {len(missing_search)} without search results"
        )

    if n:
        missing_search = missing_search[: int(n)]
    else:
        n = len(missing_search)

    ids_to_update = []
    updated_count = 0

    # going to manage our own transactions here so we can save in chunks
    transaction.set_autocommit(False)

    for b in missing_search:
        ids_to_update.append(update_bill(b))
        updated_count += 1
        if updated_count % status_num == 0:
            print(f"{state}: updated {updated_count} out of {n}")
        if updated_count % checkpoint == 0:
            reindex(ids_to_update)
            transaction.commit()
            ids_to_update = []

    # be sure to reindex final set
    reindex(ids_to_update)
    transaction.commit()
    transaction.set_autocommit(True)