def write_out_beta_scales_by_draw(beta_scales: pd.DataFrame,
                                  data_interface: ForecastDataInterface,
                                  offset: pd.Series, scenario: str) -> None:
    # Compute these draw specific parameters now that we have the offset.
    beta_scales['log_beta_residual_mean_offset'] = offset
    beta_scales['log_beta_residual_mean'] -= offset
    beta_scales['scale_final'] = np.exp(beta_scales['log_beta_residual_mean'])
    draw_id = beta_scales['draw'].iat[0]
    data_interface.save_beta_scales(beta_scales.reset_index(), scenario,
                                    draw_id)
def compute_initial_beta_scaling_parameters_by_draw(
        draw_id: int, total_deaths: pd.Series, beta_scaling: Dict,
        data_interface: ForecastDataInterface) -> pd.DataFrame:
    # Construct a list of pandas Series indexed by location and named
    # as their column will be in the output dataframe. We'll append
    # to this list as we construct the parameters.
    draw_data = [
        total_deaths.copy(),
        pd.Series(beta_scaling['window_size'],
                  index=total_deaths.index,
                  name='window_size')
    ]

    # Today in the data is unique by draw.  It's a combination of the
    # number of predicted days from the elastispliner in the ODE fit
    # and the random draw of lag between infection and death from the
    # infectionator. Don't compute, let's look it up.
    transition_date = data_interface.load_transition_date(draw_id)

    beta_regression_df = data_interface.load_beta_regression(draw_id)
    beta_regression_df = beta_regression_df.set_index(
        'location_id').sort_index()
    idx = beta_regression_df.index

    # Select out the transition day to compute the initial scaling parameter.
    beta_transition = beta_regression_df.loc[beta_regression_df['date'] ==
                                             transition_date.loc[idx]]
    draw_data.append(beta_transition['beta'].rename('fit_final'))
    draw_data.append(beta_transition['beta_pred'].rename('pred_start'))
    draw_data.append((beta_transition['beta'] /
                      beta_transition['beta_pred']).rename('scale_init'))

    # Compute the beta residual mean for our parameterization and hang on
    # to some ancillary information that may be useful for plotting/debugging.
    rs = np.random.RandomState(draw_id)
    a = rs.randint(1, beta_scaling['average_over_min'])
    b = rs.randint(a + 7, beta_scaling['average_over_max'])

    draw_data.append(
        pd.Series(a, index=total_deaths.index, name='history_days_start'))
    draw_data.append(
        pd.Series(b, index=total_deaths.index, name='history_days_end'))

    beta_past = (beta_regression_df.loc[
        beta_regression_df['date'] <= transition_date.loc[idx]].reset_index().
                 set_index(['location_id', 'date']).sort_index())

    log_beta_resid_mean = (np.log(
        beta_past['beta'] /
        beta_past['beta_pred']).groupby(level='location_id').apply(
            lambda x: x.iloc[-b:-a].mean()).rename('log_beta_residual_mean'))
    draw_data.append(log_beta_resid_mean)
    draw_data.append(pd.Series(draw_id, index=total_deaths.index, name='draw'))

    return pd.concat(draw_data, axis=1)
Exemplo n.º 3
0
def load_elastispliner_outputs(data_interface: ForecastDataInterface,
                               noisy: bool):
    es_noisy, es_smoothed = data_interface.load_elastispliner_outputs()
    es_outputs = es_noisy if noisy else es_smoothed
    es_outputs = es_outputs.set_index(['location_id', 'date', 'observed'])
    n_draws = data_interface.get_n_draws()
    es_outputs = es_outputs.rename(
        columns={f'draw_{i}': i
                 for i in range(n_draws)})
    es_outputs = es_outputs.groupby(
        level='location_id').apply(lambda x: x - x.shift(fill_value=0))
    return es_outputs
Exemplo n.º 4
0
def postprocess_measure(data_interface: ForecastDataInterface,
                        resampling_map: Dict[int, Dict[str, List[int]]],
                        scenario_name: str, measure: str) -> None:
    measure_config = MEASURES[measure]
    logger.info(f'Loading {measure}.')
    measure_data = measure_config.loader(scenario_name, data_interface)
    if isinstance(measure_data, (list, tuple)):
        logger.info(f'Concatenating {measure}.')
        measure_data = pd.concat(measure_data, axis=1)
    logger.info(f'Resampling {measure}.')
    measure_data = pp.resample_draws(measure_data, resampling_map)

    if measure_config.aggregator is not None:
        hierarchy = pp.load_modeled_hierarchy(data_interface)
        population = pp.load_populations(data_interface)
        measure_data = measure_config.aggregator(measure_data, hierarchy, population)

    logger.info(f'Saving draws and summaries for {measure}.')
    data_interface.save_output_draws(measure_data.reset_index(), scenario_name, measure_config.label)
    summarized = pp.summarize(measure_data)
    data_interface.save_output_summaries(summarized.reset_index(), scenario_name, measure_config.label)

    if measure_config.calculate_cumulative:
        logger.info(f'Saving cumulative draws and summaries for {measure}.')
        cumulative_measure_data = measure_data.groupby(level='location_id').cumsum()
        data_interface.save_output_draws(cumulative_measure_data.reset_index(), scenario_name,
                                         measure_config.cumulative_label)
        summarized = pp.summarize(cumulative_measure_data)
        data_interface.save_output_summaries(summarized.reset_index(), scenario_name,
                                             measure_config.cumulative_label)
Exemplo n.º 5
0
def do_beta_forecast(app_metadata: cli_tools.Metadata,
                     forecast_specification: ForecastSpecification,
                     preprocess_only: bool):
    logger.debug('Starting beta forecast.')

    data_interface = ForecastDataInterface.from_specification(forecast_specification)

    # Check scenario covariates the same as regression covariates and that
    # covariate data versions match.
    covariates = data_interface.check_covariates(forecast_specification.scenarios)

    data_interface.make_dirs()
    # Fixme: Inconsistent data writing interfaces
    forecast_specification.dump(data_interface.forecast_paths.forecast_specification)

    if not preprocess_only:
        forecast_wf = ForecastWorkflow(forecast_specification.data.output_root)
        n_draws = data_interface.get_n_draws()

        forecast_wf.attach_tasks(n_draws=n_draws,
                                 scenarios=forecast_specification.scenarios,
                                 covariates=covariates)
        try:
            forecast_wf.run()
        except WorkflowAlreadyComplete:
            logger.info('Workflow already complete')
Exemplo n.º 6
0
def load_populations(data_interface: ForecastDataInterface):
    metadata = data_interface.get_infectionator_metadata()
    model_inputs_path = Path(
        metadata['death']['metadata']['model_inputs_metadata']['output_path'])
    population_path = model_inputs_path / 'output_measures' / 'population' / 'all_populations.csv'
    populations = pd.read_csv(population_path)
    return populations
Exemplo n.º 7
0
def load_hierarchy(data_interface: ForecastDataInterface):
    metadata = data_interface.get_infectionator_metadata()
    model_inputs_path = Path(
        metadata['death']['metadata']['model_inputs_metadata']['output_path'])
    hierarchy_path = model_inputs_path / 'locations' / 'modeling_hierarchy.csv'
    hierarchy = pd.read_csv(hierarchy_path)
    return hierarchy
Exemplo n.º 8
0
def load_coefficients(scenario: str, data_interface: ForecastDataInterface):
    _runner = functools.partial(load_coefficients_by_draw,
                                data_interface=data_interface)
    draws = range(data_interface.get_n_draws())
    with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool:
        outputs = pool.map(_runner, draws)
    return outputs
Exemplo n.º 9
0
def load_beta_residuals_by_draw(
        draw_id: int, data_interface: ForecastDataInterface) -> pd.Series:
    beta_regression = data_interface.load_beta_regression(draw_id)
    beta_regression = (beta_regression.set_index(
        ['location_id', 'date']).sort_index()[['beta', 'beta_pred']])
    beta_residual = np.log(beta_regression['beta'] /
                           beta_regression['beta_pred']).rename(draw_id)
    return beta_residual
Exemplo n.º 10
0
def load_coefficients_by_draw(
        draw_id: int, data_interface: ForecastDataInterface) -> pd.Series:
    coefficients = data_interface.load_regression_coefficients(draw_id)
    coefficients = coefficients.set_index('location_id').stack().reset_index()
    coefficients.columns = ['location_id', 'covariate', draw_id]
    coefficients = coefficients.set_index(['location_id',
                                           'covariate'])[draw_id]
    return coefficients
def run_resample_map(forecast_version: str) -> None:
    forecast_spec = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE)
    resampling_params = forecast_spec.postprocessing.resampling
    data_interface = ForecastDataInterface.from_specification(forecast_spec)
    deaths, *_ = pp.load_output_data(resampling_params['reference_scenario'],
                                     data_interface)
    deaths = pd.concat(deaths, axis=1)
    resampling_map = pp.build_resampling_map(deaths, resampling_params)
    data_interface.save_resampling_map(resampling_map)
Exemplo n.º 12
0
def load_output_data_by_draw(
    draw_id: int, scenario: str, data_interface: ForecastDataInterface
) -> Tuple[pd.Series, pd.Series, pd.Series]:
    draw_df = data_interface.load_raw_outputs(scenario, draw_id)
    draw_df = draw_df.set_index(['location_id', 'date']).sort_index()
    deaths = draw_df.reset_index().set_index(
        ['location_id', 'date', 'observed'])['deaths'].rename(draw_id)
    infections = draw_df['infections'].rename(draw_id)
    r_effective = draw_df['r_effective'].rename(draw_id)
    return deaths, infections, r_effective
Exemplo n.º 13
0
def load_scaling_parameters_by_draw(
        draw_id: int, scenario: str,
        data_interface: ForecastDataInterface) -> pd.Series:
    scaling_parameters = data_interface.load_beta_scales(scenario, draw_id)
    scaling_parameters = scaling_parameters.set_index(
        'location_id').stack().reset_index()
    scaling_parameters.columns = ['location_id', 'scaling_parameter', draw_id]
    scaling_parameters = scaling_parameters.set_index(
        ['location_id', 'scaling_parameter'])[draw_id]
    return scaling_parameters
Exemplo n.º 14
0
def load_covariate_by_draw(draw_id: int, covariate: str, time_varying: bool,
                           scenario: str,
                           data_interface: ForecastDataInterface) -> pd.Series:
    covariate_df = data_interface.load_raw_covariates(scenario, draw_id)
    covariate_df = covariate_df.set_index(['location_id', 'date']).sort_index()
    if time_varying:
        covariate_data = covariate_df[covariate].rename(draw_id)
    else:
        covariate_data = covariate_df.groupby(
            level='location_id')[covariate].max().rename(draw_id)
    return covariate_data
Exemplo n.º 15
0
def load_beta_residuals(
        scenario: str,
        data_interface: ForecastDataInterface) -> List[pd.Series]:
    _runner = functools.partial(
        load_beta_residuals_by_draw,
        data_interface=data_interface,
    )
    draws = range(data_interface.get_n_draws())
    with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool:
        beta_residuals = pool.map(_runner, draws)
    return beta_residuals
Exemplo n.º 16
0
def get_locations_modeled_and_missing(data_interface: ForecastDataInterface):
    hierarchy = load_hierarchy(data_interface)
    modeled_locations = data_interface.load_location_ids()
    most_detailed_locs = hierarchy.loc[hierarchy.most_detailed == 1,
                                       'location_id'].unique().tolist()
    missing_locations = list(
        set(most_detailed_locs).difference(modeled_locations))
    locations_modeled_and_missing = {
        'modeled': modeled_locations,
        'missing': missing_locations
    }
    return locations_modeled_and_missing
Exemplo n.º 17
0
def load_output_data(scenario: str, data_interface: ForecastDataInterface):
    _runner = functools.partial(
        load_output_data_by_draw,
        scenario=scenario,
        data_interface=data_interface,
    )
    draws = range(data_interface.get_n_draws())
    with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool:
        outputs = pool.map(_runner, draws)
    deaths, infections, r_effective = zip(*outputs)

    return deaths, infections, r_effective
Exemplo n.º 18
0
def postprocess_miscellaneous(data_interface: ForecastDataInterface,
                              scenario_name: str, measure: str):
    miscellaneous_config = MISCELLANEOUS[measure]
    logger.info(f'Loading {measure}.')
    miscellaneous_data = miscellaneous_config.loader(data_interface)

    if miscellaneous_config.aggregator is not None:
        hierarchy = pp.load_modeled_hierarchy(data_interface)
        population = pp.load_populations(data_interface)
        miscellaneous_data = miscellaneous_config.aggregator(miscellaneous_data, hierarchy, population)

    logger.info(f'Saving {measure} data.')
    if miscellaneous_config.is_table:
        data_interface.save_output_miscellaneous(miscellaneous_data.reset_index(), scenario_name,
                                                 miscellaneous_config.label)
    else:
        # FIXME: yuck
        miscellaneous_dir = data_interface.forecast_paths.scenario_paths[scenario_name].output_miscellaneous
        measure_path = miscellaneous_dir / f'{miscellaneous_config.label}.yaml'
        with measure_path.open('w') as f:
            yaml.dump(miscellaneous_data, f)
Exemplo n.º 19
0
def load_full_data(data_interface: ForecastDataInterface) -> pd.DataFrame:
    full_data = data_interface.load_full_data()
    full_data = full_data.set_index(['location_id', 'date'])
    full_data = full_data.rename(
        columns={
            'Deaths': 'cumulative_deaths',
            'Confirmed': 'cumulative_cases',
            'Hospitalizations': 'cumulative_hospitalizations',
        })
    full_data = full_data[[
        'cumulative_cases', 'cumulative_deaths', 'cumulative_hospitalizations'
    ]]
    return full_data
Exemplo n.º 20
0
def build_version_map(data_interface: ForecastDataInterface) -> pd.Series:
    version_map = {}
    version_map[
        'forecast_version'] = data_interface.forecast_paths.root_dir.name
    version_map[
        'regression_version'] = data_interface.regression_paths.root_dir.name
    version_map[
        'covariate_version'] = data_interface.covariate_paths.root_dir.name

    # FIXME: infectionator doesn't do metadata the right way.
    inf_metadata = data_interface.get_infectionator_metadata()
    inf_output_dir = inf_metadata['wrapped_R_call'][-1].split()[1].strip("'")
    version_map['infectionator_version'] = Path(inf_output_dir).name

    death_metadata = inf_metadata['death']['metadata']
    version_map['elastispliner_version'] = Path(
        death_metadata['output_path']).name

    model_inputs_metadata = death_metadata['model_inputs_metadata']
    version_map['model_inputs_version'] = Path(
        model_inputs_metadata['output_path']).name

    snapshot_metadata = model_inputs_metadata['snapshot_metadata']
    version_map['snapshot_version'] = Path(
        snapshot_metadata['output_path']).name
    jhu_snapshot_metadata = model_inputs_metadata['jhu_snapshot_metadata']
    version_map['jhu_snapshot_version'] = Path(
        jhu_snapshot_metadata['output_path']).name
    try:
        # There is a typo in the process that generates this key.
        # Protect ourselves in case they fix it without warning.
        webscrape_metadata = model_inputs_metadata['webcrape_metadata']
    except KeyError:
        webscrape_metadata = model_inputs_metadata['webscrape_metadata']
    version_map['webscrape_version'] = Path(
        webscrape_metadata['output_path']).name

    version_map['location_set_version_id'] = model_inputs_metadata[
        'run_arguments']['lsvid']
    try:
        version_map['location_set_version_id'] = int(
            version_map['location_set_version_id'])
    except:
        pass
    version_map['data_date'] = Path(
        snapshot_metadata['output_path']).name.split('.')[0].replace('_', '-')

    version_map = pd.Series(version_map)
    version_map = version_map.reset_index()
    version_map.columns = ['name', 'version']
    return version_map
Exemplo n.º 21
0
def load_covariate(covariate: str, time_varying: bool, scenario: str,
                   data_interface: ForecastDataInterface) -> List[pd.Series]:
    _runner = functools.partial(
        load_covariate_by_draw,
        covariate=covariate,
        time_varying=time_varying,
        scenario=scenario,
        data_interface=data_interface,
    )
    draws = range(data_interface.get_n_draws())
    with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool:
        outputs = pool.map(_runner, draws)

    return outputs
def compute_initial_beta_scaling_paramters(
        total_deaths: pd.Series, beta_scaling: dict,
        data_interface: ForecastDataInterface) -> List[pd.DataFrame]:
    # Serialization is our bottleneck, so we parallelize draw level data
    # ingestion and computation across multiple processes.
    _runner = functools.partial(
        compute_initial_beta_scaling_parameters_by_draw,
        total_deaths=total_deaths,
        beta_scaling=beta_scaling,
        data_interface=data_interface)
    draws = list(range(data_interface.get_n_draws()))
    with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool:
        scaling_data = list(pool.imap(_runner, draws))
    return scaling_data
def run_mean_level_mandate_reimposition(forecast_version: str,
                                        scenario_name: str,
                                        reimposition_number: int):
    logger.info(
        f"Initiating SEIIR mean mean level mandate reimposition {reimposition_number} "
        f"for scenario {scenario_name}.")
    forecast_spec: ForecastSpecification = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE)
    scenario_spec = forecast_spec.scenarios[scenario_name]
    data_interface = ForecastDataInterface.from_specification(forecast_spec)

    resampling_map = data_interface.load_resampling_map()
    deaths = pp.load_deaths(scenario_name, data_interface)
    deaths = pd.concat(deaths, axis=1)
    deaths = pp.resample_draws(deaths, resampling_map)
    deaths = pp.summarize(deaths)
    deaths = deaths['mean'].rename('deaths').reset_index()
    deaths['date'] = pd.to_datetime(deaths['date'])

    modeled_locations = deaths['location_id'].unique().tolist()
    deaths = deaths.set_index(['location_id', 'date'])

    population = pp.load_populations(data_interface)
    population = population[population.location_id.isin(modeled_locations)
                            & (population.age_group_id == 22)
                            & (population.sex_id
                               == 3)].set_index('location_id')['population']

    min_wait, days_on, reimposition_threshold = model.unpack_parameters(
        scenario_spec.algorithm_params)

    previous_dates = pd.Series(pd.NaT, index=population.index)
    for previous_reimposition in range(reimposition_number - 1, 0, -1):
        these_dates = data_interface.load_reimposition_dates(
            scenario=scenario_name, reimposition_number=previous_reimposition)
        these_dates = pd.to_datetime(
            these_dates.set_index('location_id')['reimposition_date'])
        these_dates = these_dates.reindex(previous_dates.index)
        this_reimposition = previous_dates.isnull() & these_dates.notnull()
        previous_dates.loc[this_reimposition] = these_dates.loc[
            this_reimposition]
    last_reimposition_end_date = previous_dates + days_on
    reimposition_date = model.compute_reimposition_date(
        deaths, population, reimposition_threshold, min_wait,
        last_reimposition_end_date)
    data_interface.save_reimposition_dates(
        reimposition_date.reset_index(),
        scenario=scenario_name,
        reimposition_number=reimposition_number)
Exemplo n.º 24
0
def load_elastispliner_inputs(
        data_interface: ForecastDataInterface) -> pd.DataFrame:
    es_inputs = data_interface.load_elastispliner_inputs()
    es_inputs = es_inputs.set_index(['location_id', 'date'])
    cumulative_cases = (es_inputs['Confirmed case rate'] *
                        es_inputs['population']).rename('cumulative_cases')
    cumulative_deaths = (es_inputs['Death rate'] *
                         es_inputs['population']).rename('cumulative_deaths')
    cumulative_hospitalizations = (es_inputs['Hospitalization rate'] *
                                   es_inputs['population'])
    cumulative_hospitalizations = cumulative_hospitalizations.rename(
        'cumulative_hospitalizations')
    es_inputs = pd.concat(
        [cumulative_cases, cumulative_deaths, cumulative_hospitalizations],
        axis=1)
    return es_inputs
Exemplo n.º 25
0
def run_seir_postprocessing(forecast_version: str, scenario_name: str, measure: str) -> None:
    logger.info(f'Starting postprocessing for forecast version {forecast_version}, scenario {scenario_name}.')
    forecast_spec = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE
    )
    scenario_spec = forecast_spec.scenarios[scenario_name]
    data_interface = ForecastDataInterface.from_specification(forecast_spec)
    resampling_map = data_interface.load_resampling_map()

    if measure in MEASURES:
        postprocess_measure(data_interface, resampling_map, scenario_name, measure)
    elif measure in COVARIATES:
        postprocess_covariate(data_interface, resampling_map, scenario_spec, scenario_name, measure)
    elif measure in MISCELLANEOUS:
        postprocess_miscellaneous(data_interface, scenario_name, measure)
    else:
        raise NotImplementedError(f'Unknown measure {measure}.')

    logger.info('**DONE**')
Exemplo n.º 26
0
def postprocess_covariate(data_interface: ForecastDataInterface,
                          resampling_map: Dict[int, Dict[str, List[int]]],
                          scenario_spec: ScenarioSpecification,
                          scenario_name: str, covariate: str) -> None:
    covariate_config = COVARIATES[covariate]
    logger.info(f'Loading {covariate}.')
    covariate_data = covariate_config.loader(covariate, covariate_config.time_varying, scenario_name, data_interface)
    logger.info(f'Concatenating and resampling {covariate}.')
    covariate_data = pd.concat(covariate_data, axis=1)
    covariate_data = pp.resample_draws(covariate_data, resampling_map)

    if covariate_config.aggregator is not None:
        hierarchy = pp.load_modeled_hierarchy(data_interface)
        population = pp.load_populations(data_interface)
        covariate_data = covariate_config.aggregator(covariate_data, hierarchy, population)

    covariate_version = scenario_spec.covariates[covariate]
    location_ids = data_interface.load_location_ids()
    n_draws = data_interface.get_n_draws()

    logger.info(f'Loading and processing input data for {covariate}.')
    input_covariate_data = data_interface.load_covariate(covariate, covariate_version, location_ids, with_observed=True)
    covariate_observed = input_covariate_data.reset_index(level='observed')
    covariate_data = covariate_data.merge(covariate_observed, left_index=True,
                                          right_index=True, how='outer').reset_index()
    draw_cols = [f'draw_{i}' for i in range(n_draws)]
    if 'date' in covariate_data.columns:
        index_cols = ['location_id', 'date', 'observed']
    else:
        index_cols = ['location_id', 'observed']

    covariate_data = covariate_data.set_index(index_cols)[draw_cols]
    covariate_data['modeled'] = covariate_data.notnull().all(axis=1).astype(int)

    input_covariate = pd.concat([input_covariate_data.reorder_levels(index_cols)] * n_draws, axis=1)
    input_covariate.columns = draw_cols
    covariate_data = covariate_data.combine_first(input_covariate).set_index('modeled', append=True)

    logger.info(f'Saving data for {covariate}.')
    if covariate_config.draw_level:
        data_interface.save_output_draws(covariate_data.reset_index(), scenario_name, covariate_config.label)

    summarized_data = pp.summarize(covariate_data)
    data_interface.save_output_summaries(summarized_data.reset_index(), scenario_name, covariate_config.label)
Exemplo n.º 27
0
    def test_forecast_io(self, tmpdir, components, beta_scales,
                         forecast_outputs):
        forecast_paths = ForecastPaths(
            root_dir=Path(tmpdir),
            scenarios=['happy'],
        )
        di = ForecastDataInterface(
            forecast_paths=None,
            regression_paths=None,
            covariate_paths=None,
            regression_marshall=None,
            forecast_marshall=CSVMarshall.from_paths(forecast_paths),
        )

        # Step 1: save files
        di.save_components(components, scenario="happy", draw_id=4)
        di.save_beta_scales(beta_scales, scenario="happy", draw_id=4)
        di.save_raw_outputs(forecast_outputs, scenario="happy", draw_id=4)

        # Step 2: test save location
        # this is sort of cheating, but it ensures that scenario things are
        # nicely nested as they should be
        assert (Path(tmpdir) / "happy" / "component_draws" /
                "draw_4.csv").exists()
        assert (Path(tmpdir) / "happy" / "beta_scaling" /
                "draw_4.csv").exists()
        assert (Path(tmpdir) / "happy" / "raw_outputs" / "draw_4.csv").exists()

        # Step 3: load those files
        loaded_components = di.load_components(scenario="happy", draw_id=4)
        # Load components now does some formatting, which broke the tests.
        # Back out these changes here.
        loaded_components = loaded_components.reset_index()
        loaded_components['date'] = loaded_components['date'].astype(str)
        loaded_components = loaded_components[
            components.columns]  # Use the same sort order.

        loaded_beta_scales = di.load_beta_scales(scenario="happy", draw_id=4)
        loaded_forecast_outputs = di.load_raw_outputs(scenario="happy",
                                                      draw_id=4)

        # Step 4: test files
        pandas.testing.assert_frame_equal(components, loaded_components)
        pandas.testing.assert_frame_equal(beta_scales, loaded_beta_scales)
        pandas.testing.assert_frame_equal(forecast_outputs,
                                          loaded_forecast_outputs)
Exemplo n.º 28
0
    def test_regression_io(self, tmpdir, coefficients, dates, regression_beta,
                           location_data, parameters):
        """
        Test I/O relating to regression stage.

        This only includes loading files, as they are all saved by the
        RegressionDataInterface.
        """
        regress_paths = RegressionPaths(Path(tmpdir))
        rdi = RegressionDataInterface(
            infection_paths=None,
            regression_paths=regress_paths,
            covariate_paths=None,
            regression_marshall=CSVMarshall(regress_paths.root_dir),
        )

        fdi = ForecastDataInterface(
            forecast_paths=None,
            regression_paths=None,
            covariate_paths=None,
            regression_marshall=CSVMarshall.from_paths(regress_paths),
            forecast_marshall=None,
        )

        # Step 1: save files (normally done in regression)
        rdi.save_regression_coefficients(coefficients, draw_id=4)
        rdi.save_beta_param_file(parameters, draw_id=4)
        rdi.save_date_file(dates, draw_id=4)
        rdi.save_regression_betas(regression_beta, draw_id=4)
        rdi.save_location_data(location_data, draw_id=4)

        # Step 2: load files as they would be loaded in forecast
        loaded_coefficients = fdi.load_regression_coefficients(draw_id=4)
        loaded_parameters = fdi.load_beta_params(draw_id=4)
        loaded_transition_dates = fdi.load_transition_date(draw_id=4)
        loaded_regression_beta = fdi.load_beta_regression(draw_id=4)
        loaded_location_data = fdi.load_infection_data(draw_id=4)

        # Step 3: test files
        pandas.testing.assert_frame_equal(coefficients, loaded_coefficients)
        # some load methods do pandas.to_datetime conversion on columns
        transition_dates = dates.set_index('location_id').sort_index(
        )['end_date'].rename('date').reset_index()
        loaded_transition_dates = loaded_transition_dates.reset_index()
        assert_equal_after_date_conversion(transition_dates,
                                           loaded_transition_dates,
                                           date_cols=['date'])
        assert_equal_after_date_conversion(regression_beta,
                                           loaded_regression_beta,
                                           date_cols=['date'])
        assert_equal_after_date_conversion(location_data,
                                           loaded_location_data,
                                           date_cols=['date'])

        # load_beta_params does not return a DataFrame but instead a dict
        # in addition, some rounding error occurs in the save/load from CSV
        expected_parameters = parameters.set_index(
            'params')['values'].to_dict()
        try:
            assert expected_parameters == loaded_parameters
        except AssertionError:
            # assert keys are identical
            assert set(expected_parameters) == set(loaded_parameters)
            # assert each value is accurate to 15 decimal places
            for k, expected in expected_parameters.items():
                loaded = loaded_parameters[k]
                numpy.testing.assert_almost_equal(loaded, expected, decimal=15)
            warnings.warn(
                "beta fit parameters accurate only to 15 decimal places after save/load cycle"
            )
Exemplo n.º 29
0
def run_beta_forecast(draw_id: int, forecast_version: str, scenario_name: str,
                      **kwargs):
    logger.info(
        f"Initiating SEIIR beta forecasting for scenario {scenario_name}, draw {draw_id}."
    )
    forecast_spec: ForecastSpecification = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE)
    scenario_spec = forecast_spec.scenarios[scenario_name]
    data_interface = ForecastDataInterface.from_specification(forecast_spec)

    logger.info('Loading input data.')
    location_ids = data_interface.load_location_ids()
    # Thetas are a parameter generated from assumption or OOS predictive
    # validity testing to curtail some of the bad behavior of the model.
    thetas = data_interface.load_thetas(scenario_spec.theta)
    # Grab the last day of data in the model by location id.  This will
    # correspond to the initial condition for the projection.
    transition_date = data_interface.load_transition_date(draw_id)

    # We'll use the beta and SEIR compartments from this data set to get
    # the ODE initial condition.
    beta_regression_df = data_interface.load_beta_regression(
        draw_id).set_index('location_id').sort_index()
    past_components = beta_regression_df[['date', 'beta'] +
                                         static_vars.SEIIR_COMPARTMENTS]

    # Select out the initial condition using the day of transition.
    transition_day = past_components['date'] == transition_date.loc[
        past_components.index]
    initial_condition = past_components.loc[transition_day,
                                            static_vars.SEIIR_COMPARTMENTS]
    before_model = past_components['date'] < transition_date.loc[
        past_components.index]
    past_components = past_components[before_model]

    # Covariates and coefficients, and scaling parameters are
    # used to compute beta hat in the future.
    covariates = data_interface.load_covariates(scenario_spec, location_ids)
    coefficients = data_interface.load_regression_coefficients(draw_id)

    # Grab the projection of the covariates into the future, keeping the
    # day of transition from past model to future model.
    covariates = covariates.set_index('location_id').sort_index()
    the_future = covariates['date'] >= transition_date.loc[covariates.index]
    covariate_pred = covariates.loc[the_future].reset_index()

    beta_scales = data_interface.load_beta_scales(scenario=scenario_name,
                                                  draw_id=draw_id)

    # We'll use the same params in the ODE forecast as we did in the fit.
    beta_params = data_interface.load_beta_params(draw_id=draw_id)

    # We'll need this to compute deaths and to splice with the forecasts.
    infection_data = data_interface.load_infection_data(draw_id)

    if ((1 < thetas) | thetas < -1).any():
        raise ValueError('Theta must be between -1 and 1.')
    if (beta_params['sigma'] - thetas >= 1).any():
        raise ValueError('Sigma - theta must be smaller than 1')

    # Modeling starts
    logger.info('Forecasting beta and components.')
    betas = model.forecast_beta(covariate_pred, coefficients, beta_scales)
    future_components = model.run_normal_ode_model_by_location(
        initial_condition, beta_params, betas, thetas, location_ids,
        scenario_spec.solver, scenario_spec.system)
    logger.info('Processing ODE results and computing deaths and infections.')
    components, infections, deaths, r_effective = model.compute_output_metrics(
        infection_data, past_components, future_components, thetas,
        beta_params, scenario_spec.system)

    if scenario_spec.algorithm == 'draw_level_mandate_reimposition':
        logger.info('Entering mandate reimposition.')
        # Info data specific to mandate reimposition
        percent_mandates = data_interface.load_covariate_info(
            'mobility', 'mandate_lift', location_ids)
        mandate_effect = data_interface.load_covariate_info(
            'mobility', 'effect', location_ids)
        min_wait, days_on, reimposition_threshold = model.unpack_parameters(
            scenario_spec.algorithm_params)

        population = (components[static_vars.SEIIR_COMPARTMENTS].sum(
            axis=1).rename('population').groupby('location_id').max())
        logger.info('Loading mandate reimposition data.')

        reimposition_count = 0
        reimposition_dates = {}
        last_reimposition_end_date = pd.Series(pd.NaT, index=population.index)
        reimposition_date = model.compute_reimposition_date(
            deaths, population, reimposition_threshold, min_wait,
            last_reimposition_end_date)

        while len(reimposition_date):  # any place reimposes mandates.
            logger.info(
                f'On mandate reimposition {reimposition_count + 1}. {len(reimposition_date)} locations '
                f'are reimposing mandates.')
            mobility = covariates[['date',
                                   'mobility']].reset_index().set_index(
                                       ['location_id', 'date'])['mobility']
            mobility_lower_bound = model.compute_mobility_lower_bound(
                mobility, mandate_effect)

            new_mobility = model.compute_new_mobility(mobility,
                                                      reimposition_date,
                                                      mobility_lower_bound,
                                                      percent_mandates,
                                                      days_on)

            covariates = covariates.reset_index().set_index(
                ['location_id', 'date'])
            covariates['mobility'] = new_mobility
            covariates = covariates.reset_index(level='date')
            covariate_pred = covariates.loc[the_future].reset_index()

            logger.info('Forecasting beta and components.')
            betas = model.forecast_beta(covariate_pred, coefficients,
                                        beta_scales)
            future_components = model.run_normal_ode_model_by_location(
                initial_condition, beta_params, betas, thetas, location_ids,
                scenario_spec.solver, scenario_spec.system)
            logger.info(
                'Processing ODE results and computing deaths and infections.')
            components, infections, deaths, r_effective = model.compute_output_metrics(
                infection_data, past_components, future_components, thetas,
                beta_params, scenario_spec.system)

            reimposition_count += 1
            reimposition_dates[reimposition_count] = reimposition_date
            last_reimposition_end_date.loc[
                reimposition_date.index] = reimposition_date + days_on
            reimposition_date = model.compute_reimposition_date(
                deaths, population, reimposition_threshold, min_wait,
                last_reimposition_end_date)

    logger.info('Writing outputs.')
    components = components.reset_index()
    covariates = covariates.reset_index()
    outputs = pd.concat([infections, deaths, r_effective],
                        axis=1).reset_index()

    data_interface.save_components(components, scenario_name, draw_id)
    data_interface.save_raw_covariates(covariates, scenario_name, draw_id)
    data_interface.save_raw_outputs(outputs, scenario_name, draw_id)
Exemplo n.º 30
0
def load_betas_by_draw(draw_id: int, scenario: str,
                       data_interface: ForecastDataInterface) -> pd.Series:
    components = data_interface.load_components(scenario, draw_id)
    draw_betas = (components.sort_index()['beta'].rename(draw_id))
    return draw_betas