示例#1
0
def postprocess_measure(data_interface: ForecastDataInterface,
                        resampling_map: Dict[int, Dict[str, List[int]]],
                        scenario_name: str, measure: str) -> None:
    measure_config = MEASURES[measure]
    logger.info(f'Loading {measure}.')
    measure_data = measure_config.loader(scenario_name, data_interface)
    if isinstance(measure_data, (list, tuple)):
        logger.info(f'Concatenating {measure}.')
        measure_data = pd.concat(measure_data, axis=1)
    logger.info(f'Resampling {measure}.')
    measure_data = pp.resample_draws(measure_data, resampling_map)

    if measure_config.aggregator is not None:
        hierarchy = pp.load_modeled_hierarchy(data_interface)
        population = pp.load_populations(data_interface)
        measure_data = measure_config.aggregator(measure_data, hierarchy, population)

    logger.info(f'Saving draws and summaries for {measure}.')
    data_interface.save_output_draws(measure_data.reset_index(), scenario_name, measure_config.label)
    summarized = pp.summarize(measure_data)
    data_interface.save_output_summaries(summarized.reset_index(), scenario_name, measure_config.label)

    if measure_config.calculate_cumulative:
        logger.info(f'Saving cumulative draws and summaries for {measure}.')
        cumulative_measure_data = measure_data.groupby(level='location_id').cumsum()
        data_interface.save_output_draws(cumulative_measure_data.reset_index(), scenario_name,
                                         measure_config.cumulative_label)
        summarized = pp.summarize(cumulative_measure_data)
        data_interface.save_output_summaries(summarized.reset_index(), scenario_name,
                                             measure_config.cumulative_label)
示例#2
0
def postprocess_covariate(data_interface: ForecastDataInterface,
                          resampling_map: Dict[int, Dict[str, List[int]]],
                          scenario_spec: ScenarioSpecification,
                          scenario_name: str, covariate: str) -> None:
    covariate_config = COVARIATES[covariate]
    logger.info(f'Loading {covariate}.')
    covariate_data = covariate_config.loader(covariate, covariate_config.time_varying, scenario_name, data_interface)
    logger.info(f'Concatenating and resampling {covariate}.')
    covariate_data = pd.concat(covariate_data, axis=1)
    covariate_data = pp.resample_draws(covariate_data, resampling_map)

    if covariate_config.aggregator is not None:
        hierarchy = pp.load_modeled_hierarchy(data_interface)
        population = pp.load_populations(data_interface)
        covariate_data = covariate_config.aggregator(covariate_data, hierarchy, population)

    covariate_version = scenario_spec.covariates[covariate]
    location_ids = data_interface.load_location_ids()
    n_draws = data_interface.get_n_draws()

    logger.info(f'Loading and processing input data for {covariate}.')
    input_covariate_data = data_interface.load_covariate(covariate, covariate_version, location_ids, with_observed=True)
    covariate_observed = input_covariate_data.reset_index(level='observed')
    covariate_data = covariate_data.merge(covariate_observed, left_index=True,
                                          right_index=True, how='outer').reset_index()
    draw_cols = [f'draw_{i}' for i in range(n_draws)]
    if 'date' in covariate_data.columns:
        index_cols = ['location_id', 'date', 'observed']
    else:
        index_cols = ['location_id', 'observed']

    covariate_data = covariate_data.set_index(index_cols)[draw_cols]
    covariate_data['modeled'] = covariate_data.notnull().all(axis=1).astype(int)

    input_covariate = pd.concat([input_covariate_data.reorder_levels(index_cols)] * n_draws, axis=1)
    input_covariate.columns = draw_cols
    covariate_data = covariate_data.combine_first(input_covariate).set_index('modeled', append=True)

    logger.info(f'Saving data for {covariate}.')
    if covariate_config.draw_level:
        data_interface.save_output_draws(covariate_data.reset_index(), scenario_name, covariate_config.label)

    summarized_data = pp.summarize(covariate_data)
    data_interface.save_output_summaries(summarized_data.reset_index(), scenario_name, covariate_config.label)