示例#1
0
def run_all(output_path):
    """Run all diagnostics."""
    from zipfile import ZIP_DEFLATED, ZipFile

    from jinja2 import Template

    output_path = Path(output_path)
    output_path.mkdir(parents=True, exist_ok=True)

    data_files = []

    # Coverage
    groups = {"Coverage": [], "Quality": []}

    for source_id in [0, 1, 2, 3]:
        # Output filename
        filename = f"{source_str(source_id)}.txt"
        groups["Coverage"].append(filename)

        # Read source data
        data_files.append(fetch_source(source_id, use_cache=True))
        data = pd.read_csv(data_files[-1])

        # Generate coverage and write to file
        # TODO this doesn't allow for column names other than the defaults to
        #      coverage(), above; generalize
        (output_path / filename).write_text(coverage(data))

    # Quality checks
    from item.historical import process

    for check in QUALITY:
        # Import
        check_module = import_module(f"item.historical.diagnostic.{check}")

        # Output filename
        filename = f"{check}.csv"
        groups["Quality"].append(filename)
        data_files.append(output_path / filename)

        # Generate inputs
        inputs = [process(arg) for arg in check_module.ARGS]

        # Compute and save
        check_module.compute(*inputs).to_csv(data_files[-1])

    # Archive data files
    zf = ZipFile(output_path / "data.zip",
                 mode="w",
                 compression=ZIP_DEFLATED,
                 compresslevel=9)
    for path in data_files:
        zf.write(filename=path, arcname=path.name)

    groups["Cached raw source data"] = ["data.zip"]

    # Generate index file
    t = Template(INDEX_TEMPLATE)
    (output_path / "index.html").write_text(t.render(groups=groups))
def test_coverage(dataset_id, N_areas):
    """Test the historical.diagnostics.coverage method."""
    df = pd.read_csv(fetch_source(dataset_id, use_cache=True))
    result = coverage(df)
    assert result.startswith(f"{N_areas} areas: ")
def test_fetch(source_id):
    """Raw data can be fetched from individual sources."""
    fetch_source(source_id, use_cache=False)