Пример #1
0
def deploy_results(results: List[APIOutput], output: str, write_csv=False):
    """Deploys results from the top counties to specified output directory.

    Args:
        result: Top Counties Pipeline result.
        key: Name for the file to be uploaded
        output: output folder to save results in.
    """
    output_path = pathlib.Path(output)
    if not output_path.exists():
        output_path.mkdir(parents=True, exist_ok=True)

    for api_row in results:
        data = remove_root_wrapper(api_row.data.dict())
        # Encoding approach based on Pydantic's implementation of .json():
        # https://github.com/samuelcolvin/pydantic/pull/210/files
        # `json` isn't in `pydantic/__init__py` which I think means it doesn't intend to export
        # it. We use it anyway and pylint started complaining.
        # pylint: disable=no-member
        data_as_json = simplejson.dumps(data,
                                        ignore_nan=True,
                                        default=pydantic.json.pydantic_encoder)
        dataset_deployer.upload_json(api_row.file_stem, data_as_json, output)
        if write_csv:
            if not isinstance(data, list):
                raise ValueError("Cannot find list data for csv export.")
            dataset_deployer.write_nested_csv(data, api_row.file_stem, output)
def test_write_nested_csv_with_skipped_keys(tmp_path):
    output_path = tmp_path / "output.csv"

    data = [{"foo": {"bar": 1, "baz": 2}}]
    dataset_deployer.write_nested_csv(data, output_path, keys_to_skip=["foo.bar"])
    header = output_path.read_text().split("\n")[0]
    assert header == "foo.baz"
Пример #3
0
def deploy_csv_api_output(api_output: pydantic.BaseModel,
                          output_path: pathlib.Path,
                          columns: List[str]) -> None:
    if not hasattr(api_output, "__root__"):
        raise AssertionError("Missing root data")

    data = _model_to_dict(api_output.__dict__)
    rows = dataset_deployer.remove_root_wrapper(data)
    dataset_deployer.write_nested_csv(rows, output_path, header=columns)
def deploy_csv_api_output(
    api_output: pydantic.BaseModel,
    output_path: pathlib.Path,
    keys_to_skip: Optional[List[str]] = None,
) -> None:
    if not hasattr(api_output, "__root__"):
        raise AssertionError("Missing root data")

    rows = dataset_deployer.remove_root_wrapper(api_output.dict())
    dataset_deployer.write_nested_csv(rows,
                                      output_path,
                                      keys_to_skip=keys_to_skip)
Пример #5
0
def deploy_csv_api_output(
    intervention: Intervention,
    api_output: pydantic.BaseModel,
    output_dir: pathlib.Path,
    filename_override=None,
):
    if not hasattr(api_output, "__root__"):
        raise AssertionError("Missing root data")

    if not output_dir.exists():
        output_dir.mkdir(parents=True, exist_ok=True)

    filename = filename_override or (api_output.output_key(intervention) +
                                     ".csv")
    output_path = output_dir / filename
    rows = dataset_deployer.remove_root_wrapper(api_output.dict())
    dataset_deployer.write_nested_csv(rows, output_path)
Пример #6
0
def deploy_results(results: List[APIOutput], output: str, write_csv=False):
    """Deploys results from the top counties to specified output directory.

    Args:
        result: Top Counties Pipeline result.
        key: Name for the file to be uploaded
        output: output folder to save results in.
    """
    for api_row in results:
        dataset_deployer.upload_json(api_row.file_stem, api_row.data.json(),
                                     output)
        if write_csv:
            data = api_row.data.dict()
            if not isinstance(data, list):
                if not isinstance(data.get('data'), list):
                    # Most of the API schemas have the lists under the `'data'` key.
                    logger.warning(f"Missing data field with list of data.")
                    continue
                else:
                    data = data['data']
            dataset_deployer.write_nested_csv(data, api_row.file_stem, output)
Пример #7
0
def compare_snapshots(
    input_dir, output_dir, input_snapshot, compare_snapshot, state, fips, intervention_name
):
    if not (input_dir or input_snapshot) or (input_dir and input_snapshot):
        raise Exception("Need to specify either snapshot or input dir not both")
    states = [state] if state else STATES_50.values()
    results = []
    report = []
    output_report = ""

    if not (compare_snapshot):
        compare_snapshot = _get_compare_snapshot()
        if input_dir:
            input_snapshot_from_dir = _get_input_dir_snapshot(input_dir)
            output_report += f"More info can be found at https://data.covidactnow.org/snapshot/{input_snapshot_from_dir}/qa/compared.csv"

    for state_abbrev in states:
        if not fips:
            if not input_dir:
                api1 = _get_state_data(input_snapshot, state_abbrev, intervention_name)
            else:
                filepath = os.path.join(
                    input_dir, f"{state_abbrev.upper()}.{intervention_name}.timeseries.json"
                )
                with open(filepath) as json_file:
                    api1 = json.load(json_file)
            api2 = _get_state_data(compare_snapshot, state_abbrev, intervention_name)
        else:
            raise NotImplementedError("currently only handles states data")
        if not (api1 and api2):
            print(f"State Abbrev {state_abbrev} doesn't have data")
        else:
            comparitor = Comparitor(
                input_snapshot, compare_snapshot, api1, api2, state_abbrev, intervention_name, fips
            )
            state_results = comparitor.compare_metrics()
            if state_results:
                report.append(comparitor.generate_report())
                results.extend(state_results)
            print(f"Adding {state_abbrev} {len(state_results)} to results")

    if not len(results):
        return f"Applied dif from {input_snapshot} to {compare_snapshot}, no difference above thresholds found"

    # make the output directory if it doesn't exist already
    os.makedirs(output_dir, exist_ok=True)
    dataset_deployer.write_nested_csv(
        Comparitor.dict_results(sorted(results, reverse=True)), "compared", output_dir
    )

    formatted_report = "\n--" + "\n\n--".join(report)
    output_report = f"Applied dif from {input_snapshot if input_snapshot else input_dir} to {compare_snapshot}: {formatted_report} {output_report}"
    # write report to a file
    output_path = pathlib.Path(output_dir) / f"report.txt"
    with output_path.open("w") as report_file:
        report_file.write(output_report)

    # send report to slack
    slack_url = os.getenv("SLACK_DEV_ALERTS_WEBHOOK")
    response = requests.post(
        slack_url,
        data=json.dumps({"text": output_report}),
        headers={"Content-Type": "application/json"},
    )
Пример #8
0
def deploy_prediction_timeseries_csvs(data: APIOutput, output):
    dataset_deployer.write_nested_csv([row.dict() for row in data.data],
                                      data.file_stem, output)