示例#1
0
def _map_outputs(
    state=None,
    output_interval_days=1,
    states_only=False,
    output_dir=None,
    run_mode="default",
):
    output_interval_days = int(output_interval_days)
    _cache_global_datasets()
    if state:
        web_ui_mapper = WebUIDataAdaptorV1(
            state,
            output_interval_days=output_interval_days,
            run_mode=run_mode,
            jhu_dataset=nyt_dataset,
            cds_dataset=cds_dataset,
            output_dir=output_dir,
        )
        web_ui_mapper.generate_state(states_only=states_only)
    else:
        for state_name in ALL_STATES:
            _map_outputs(
                state_name,
                output_interval_days,
                states_only=states_only,
                run_mode=run_mode,
                output_dir=output_dir,
            )
示例#2
0
def _map_outputs(states,
                 output_interval_days=1,
                 states_only=False,
                 output_dir=None,
                 run_mode="default"):
    for state in states:
        web_ui_mapper = WebUIDataAdaptorV1(
            state,
            output_interval_days=output_interval_days,
            run_mode=run_mode,
            output_dir=output_dir,
        )
        web_ui_mapper.generate_state(whitelisted_county_fips=[],
                                     states_only=states_only)
示例#3
0
def _write_pipeline_output(
    pipelines: List[Union[SubStatePipeline, StatePipeline]],
    output_dir: str,
    output_interval_days: int = 4,
    write_webui_output: bool = False,
):

    infection_rate_metric_df = pd.concat((p.infer_df for p in pipelines),
                                         ignore_index=True)
    # TODO: Use constructors in MultiRegionTimeseriesDataset
    timeseries_dataset = TimeseriesDataset(infection_rate_metric_df)
    latest = timeseries_dataset.latest_values_object()
    multiregion_rt = MultiRegionTimeseriesDataset.from_timeseries_and_latest(
        timeseries_dataset, latest)
    output_path = pathlib.Path(
        output_dir) / pyseir.utils.SummaryArtifact.RT_METRIC_COMBINED.value
    multiregion_rt.to_csv(output_path)
    root.info(f"Saving Rt results to {output_path}")

    icu_df = pd.concat((p.icu_data.data for p in pipelines if p.icu_data),
                       ignore_index=True)
    timeseries_dataset = TimeseriesDataset(icu_df)
    latest = timeseries_dataset.latest_values_object().data.set_index(
        CommonFields.LOCATION_ID)
    multiregion_icu = MultiRegionTimeseriesDataset(icu_df, latest)

    output_path = pathlib.Path(
        output_dir) / pyseir.utils.SummaryArtifact.ICU_METRIC_COMBINED.value
    multiregion_icu.to_csv(output_path)
    root.info(f"Saving ICU results to {output_path}")

    if write_webui_output:
        # does not parallelize well, because web_ui mapper doesn't serialize efficiently
        # TODO: Remove intermediate artifacts and paralellize artifacts creation better
        # Approximately 40% of the processing time is taken on this step
        web_ui_mapper = WebUIDataAdaptorV1(
            output_interval_days=output_interval_days,
            output_dir=output_dir,
        )
        webui_inputs = [
            webui_data_adaptor_v1.RegionalInput.from_results(
                p.fitter, p.ensemble, p.infer_df) for p in pipelines
            if p.fitter
        ]

        with Pool(maxtasksperchild=1) as p:
            p.map(web_ui_mapper.write_region_safely, webui_inputs)
示例#4
0
def _map_outputs(
    state=None, output_interval_days=1, states_only=False, output_dir=None, run_mode="default",
):
    output_interval_days = int(output_interval_days)
    _cache_global_datasets()
    if state:
        web_ui_mapper = WebUIDataAdaptorV1(
            state,
            output_interval_days=output_interval_days,
            run_mode=run_mode,
            output_dir=output_dir,
        )
        web_ui_mapper.generate_state(
            whitelisted_county_fips=[], states_only=states_only,
        )
    else:
        for state_name in ALL_STATES:
            _map_outputs(
                state_name,
                output_interval_days,
                states_only=states_only,
                run_mode=run_mode,
                output_dir=output_dir,
            )
示例#5
0
def _build_all_for_states(
    states=[],
    run_mode=DEFAULT_RUN_MODE,
    generate_reports=False,
    output_interval_days=4,
    skip_download=False,
    output_dir=None,
    skip_whitelist=False,
    states_only=False,
):
    # prepare data
    _cache_global_datasets()
    if not skip_download:
        cache_all_data()
    if not skip_whitelist:
        _generate_whitelist()

    # do everything for just states in paralell
    p = Pool()
    states_only_func = partial(
        _state_only_pipeline,
        run_mode=run_mode,
        generate_reports=generate_reports,
        output_interval_days=output_interval_days,
        output_dir=output_dir,
    )
    p.map(states_only_func, states)

    if states_only:
        root.info("Only executing for states. returning.")
        return

    # run states in paralell
    all_county_fips = {}
    for state in states:
        state_county_fips = model_fitter.build_county_list(state)
        county_fips_per_state = {fips: state for fips in state_county_fips}
        all_county_fips.update(county_fips_per_state)

    # calculate calculate county inference
    p.map(infer_rt_module.run_county, all_county_fips.keys())

    # calculate model fit
    root.info(f"executing model for {len(all_county_fips)} counties")
    fitters = p.map(model_fitter._execute_model_for_fips,
                    all_county_fips.keys())

    df = pd.DataFrame([fit.fit_results for fit in fitters if fit])
    df["state"] = df.fips.replace(all_county_fips)
    df["mle_model"] = [fit.mle_model for fit in fitters if fit]
    df.index = df.fips

    state_dfs = [state_df for name, state_df in df.groupby("state")]
    p.map(model_fitter._persist_results_per_state, state_dfs)

    # calculate ensemble
    root.info(f"running ensemble for {len(all_county_fips)} counties")
    ensemble_func = partial(
        _run_county,
        ensemble_kwargs=dict(run_mode=run_mode,
                             generate_report=generate_reports),
    )
    p.map(ensemble_func, all_county_fips.keys())

    # output it all
    output_interval_days = int(output_interval_days)
    _cache_global_datasets()

    root.info(
        f"outputing web results for states and {len(all_county_fips)} counties"
    )
    # does not parallelize well, because web_ui mapper doesn't serialize efficiently
    # TODO: Remove intermediate artifacts and paralellize artifacts creation better
    # Approximately 40% of the processing time is taken on this step
    for state in states:
        web_ui_mapper = WebUIDataAdaptorV1(
            state,
            output_interval_days=output_interval_days,
            run_mode=run_mode,
            jhu_dataset=nyt_dataset,
            cds_dataset=cds_dataset,
            output_dir=output_dir,
        )
        web_ui_mapper.generate_state(all_fips=all_county_fips.keys())
    p.close()
    p.join()

    return
示例#6
0
def _build_all_for_states(
    states: List[str],
    run_mode=DEFAULT_RUN_MODE,
    generate_reports=False,
    output_interval_days=4,
    output_dir=None,
    skip_whitelist=False,
    states_only=False,
    fips=None,
):
    # prepare data
    _cache_global_datasets()

    if not skip_whitelist:
        _generate_whitelist()

    # do everything for just states in parallel
    with Pool(maxtasksperchild=1) as p:
        states_only_func = partial(
            _state_only_pipeline,
            run_mode=run_mode,
            generate_reports=generate_reports,
            output_interval_days=output_interval_days,
            output_dir=output_dir,
        )
        p.map(states_only_func, states)

    if states_only:
        root.info("Only executing for states. returning.")
        return

    all_county_fips = build_counties_to_run_per_state(states, fips=fips)

    with Pool(maxtasksperchild=1) as p:
        # calculate calculate county inference
        p.map(infer_rt.run_rt_for_fips, all_county_fips.keys())
        # calculate model fit
        root.info(f"executing model for {len(all_county_fips)} counties")
        fitters = p.map(model_fitter.execute_model_for_fips,
                        all_county_fips.keys())

        df = pd.DataFrame([fit.fit_results for fit in fitters if fit])
        df["state"] = df.fips.replace(all_county_fips)
        df["mle_model"] = [fit.mle_model for fit in fitters if fit]
        df.index = df.fips

        state_dfs = [state_df for name, state_df in df.groupby("state")]
        p.map(model_fitter._persist_results_per_state, state_dfs)

        # calculate ensemble
        root.info(f"running ensemble for {len(all_county_fips)} counties")
        ensemble_func = partial(
            ensemble_runner._run_county,
            ensemble_kwargs=dict(run_mode=run_mode,
                                 generate_report=generate_reports),
        )
        p.map(ensemble_func, all_county_fips.keys())

    # output it all
    output_interval_days = int(output_interval_days)
    _cache_global_datasets()

    root.info(
        f"outputting web results for states and {len(all_county_fips)} counties"
    )
    # does not parallelize well, because web_ui mapper doesn't serialize efficiently
    # TODO: Remove intermediate artifacts and paralellize artifacts creation better
    # Approximately 40% of the processing time is taken on this step
    for state in states:
        web_ui_mapper = WebUIDataAdaptorV1(
            state,
            output_interval_days=output_interval_days,
            run_mode=run_mode,
            output_dir=output_dir,
        )
        web_ui_mapper.generate_state(
            whitelisted_county_fips=[
                k for k, v in all_county_fips.items() if v == state
            ],
            states_only=False,
        )

    return