def run_all(output_path): """Run all diagnostics.""" from zipfile import ZIP_DEFLATED, ZipFile from jinja2 import Template output_path = Path(output_path) output_path.mkdir(parents=True, exist_ok=True) data_files = [] # Coverage groups = {"Coverage": [], "Quality": []} for source_id in [0, 1, 2, 3]: # Output filename filename = f"{source_str(source_id)}.txt" groups["Coverage"].append(filename) # Read source data data_files.append(fetch_source(source_id, use_cache=True)) data = pd.read_csv(data_files[-1]) # Generate coverage and write to file # TODO this doesn't allow for column names other than the defaults to # coverage(), above; generalize (output_path / filename).write_text(coverage(data)) # Quality checks from item.historical import process for check in QUALITY: # Import check_module = import_module(f"item.historical.diagnostic.{check}") # Output filename filename = f"{check}.csv" groups["Quality"].append(filename) data_files.append(output_path / filename) # Generate inputs inputs = [process(arg) for arg in check_module.ARGS] # Compute and save check_module.compute(*inputs).to_csv(data_files[-1]) # Archive data files zf = ZipFile(output_path / "data.zip", mode="w", compression=ZIP_DEFLATED, compresslevel=9) for path in data_files: zf.write(filename=path, arcname=path.name) groups["Cached raw source data"] = ["data.zip"] # Generate index file t = Template(INDEX_TEMPLATE) (output_path / "index.html").write_text(t.render(groups=groups))
def test_coverage(dataset_id, N_areas): """Test the historical.diagnostics.coverage method.""" df = pd.read_csv(fetch_source(dataset_id, use_cache=True)) result = coverage(df) assert result.startswith(f"{N_areas} areas: ")
def test_fetch(source_id): """Raw data can be fetched from individual sources.""" fetch_source(source_id, use_cache=False)