def deploy_results(results: List[APIOutput], output: str, write_csv=False): """Deploys results from the top counties to specified output directory. Args: result: Top Counties Pipeline result. key: Name for the file to be uploaded output: output folder to save results in. """ output_path = pathlib.Path(output) if not output_path.exists(): output_path.mkdir(parents=True, exist_ok=True) for api_row in results: data = remove_root_wrapper(api_row.data.dict()) # Encoding approach based on Pydantic's implementation of .json(): # https://github.com/samuelcolvin/pydantic/pull/210/files # `json` isn't in `pydantic/__init__py` which I think means it doesn't intend to export # it. We use it anyway and pylint started complaining. # pylint: disable=no-member data_as_json = simplejson.dumps(data, ignore_nan=True, default=pydantic.json.pydantic_encoder) dataset_deployer.upload_json(api_row.file_stem, data_as_json, output) if write_csv: if not isinstance(data, list): raise ValueError("Cannot find list data for csv export.") dataset_deployer.write_nested_csv(data, api_row.file_stem, output)
def test_write_nested_csv_with_skipped_keys(tmp_path): output_path = tmp_path / "output.csv" data = [{"foo": {"bar": 1, "baz": 2}}] dataset_deployer.write_nested_csv(data, output_path, keys_to_skip=["foo.bar"]) header = output_path.read_text().split("\n")[0] assert header == "foo.baz"
def deploy_csv_api_output(api_output: pydantic.BaseModel, output_path: pathlib.Path, columns: List[str]) -> None: if not hasattr(api_output, "__root__"): raise AssertionError("Missing root data") data = _model_to_dict(api_output.__dict__) rows = dataset_deployer.remove_root_wrapper(data) dataset_deployer.write_nested_csv(rows, output_path, header=columns)
def deploy_csv_api_output( api_output: pydantic.BaseModel, output_path: pathlib.Path, keys_to_skip: Optional[List[str]] = None, ) -> None: if not hasattr(api_output, "__root__"): raise AssertionError("Missing root data") rows = dataset_deployer.remove_root_wrapper(api_output.dict()) dataset_deployer.write_nested_csv(rows, output_path, keys_to_skip=keys_to_skip)
def deploy_csv_api_output( intervention: Intervention, api_output: pydantic.BaseModel, output_dir: pathlib.Path, filename_override=None, ): if not hasattr(api_output, "__root__"): raise AssertionError("Missing root data") if not output_dir.exists(): output_dir.mkdir(parents=True, exist_ok=True) filename = filename_override or (api_output.output_key(intervention) + ".csv") output_path = output_dir / filename rows = dataset_deployer.remove_root_wrapper(api_output.dict()) dataset_deployer.write_nested_csv(rows, output_path)
def deploy_results(results: List[APIOutput], output: str, write_csv=False): """Deploys results from the top counties to specified output directory. Args: result: Top Counties Pipeline result. key: Name for the file to be uploaded output: output folder to save results in. """ for api_row in results: dataset_deployer.upload_json(api_row.file_stem, api_row.data.json(), output) if write_csv: data = api_row.data.dict() if not isinstance(data, list): if not isinstance(data.get('data'), list): # Most of the API schemas have the lists under the `'data'` key. logger.warning(f"Missing data field with list of data.") continue else: data = data['data'] dataset_deployer.write_nested_csv(data, api_row.file_stem, output)
def compare_snapshots( input_dir, output_dir, input_snapshot, compare_snapshot, state, fips, intervention_name ): if not (input_dir or input_snapshot) or (input_dir and input_snapshot): raise Exception("Need to specify either snapshot or input dir not both") states = [state] if state else STATES_50.values() results = [] report = [] output_report = "" if not (compare_snapshot): compare_snapshot = _get_compare_snapshot() if input_dir: input_snapshot_from_dir = _get_input_dir_snapshot(input_dir) output_report += f"More info can be found at https://data.covidactnow.org/snapshot/{input_snapshot_from_dir}/qa/compared.csv" for state_abbrev in states: if not fips: if not input_dir: api1 = _get_state_data(input_snapshot, state_abbrev, intervention_name) else: filepath = os.path.join( input_dir, f"{state_abbrev.upper()}.{intervention_name}.timeseries.json" ) with open(filepath) as json_file: api1 = json.load(json_file) api2 = _get_state_data(compare_snapshot, state_abbrev, intervention_name) else: raise NotImplementedError("currently only handles states data") if not (api1 and api2): print(f"State Abbrev {state_abbrev} doesn't have data") else: comparitor = Comparitor( input_snapshot, compare_snapshot, api1, api2, state_abbrev, intervention_name, fips ) state_results = comparitor.compare_metrics() if state_results: report.append(comparitor.generate_report()) results.extend(state_results) print(f"Adding {state_abbrev} {len(state_results)} to results") if not len(results): return f"Applied dif from {input_snapshot} to {compare_snapshot}, no difference above thresholds found" # make the output directory if it doesn't exist already os.makedirs(output_dir, exist_ok=True) dataset_deployer.write_nested_csv( Comparitor.dict_results(sorted(results, reverse=True)), "compared", output_dir ) formatted_report = "\n--" + "\n\n--".join(report) output_report = f"Applied dif from {input_snapshot if input_snapshot else input_dir} to {compare_snapshot}: {formatted_report} {output_report}" # write report to a file output_path = pathlib.Path(output_dir) / f"report.txt" with output_path.open("w") as report_file: report_file.write(output_report) # send report to slack slack_url = os.getenv("SLACK_DEV_ALERTS_WEBHOOK") response = requests.post( slack_url, data=json.dumps({"text": output_report}), headers={"Content-Type": "application/json"}, )
def deploy_prediction_timeseries_csvs(data: APIOutput, output): dataset_deployer.write_nested_csv([row.dict() for row in data.data], data.file_stem, output)