示例#1
0
def get_locations_modeled_and_missing(data_interface: ForecastDataInterface):
    hierarchy = load_hierarchy(data_interface)
    modeled_locations = data_interface.load_location_ids()
    most_detailed_locs = hierarchy.loc[hierarchy.most_detailed == 1,
                                       'location_id'].unique().tolist()
    missing_locations = list(
        set(most_detailed_locs).difference(modeled_locations))
    locations_modeled_and_missing = {
        'modeled': modeled_locations,
        'missing': missing_locations
    }
    return locations_modeled_and_missing
示例#2
0
def postprocess_covariate(data_interface: ForecastDataInterface,
                          resampling_map: Dict[int, Dict[str, List[int]]],
                          scenario_spec: ScenarioSpecification,
                          scenario_name: str, covariate: str) -> None:
    covariate_config = COVARIATES[covariate]
    logger.info(f'Loading {covariate}.')
    covariate_data = covariate_config.loader(covariate, covariate_config.time_varying, scenario_name, data_interface)
    logger.info(f'Concatenating and resampling {covariate}.')
    covariate_data = pd.concat(covariate_data, axis=1)
    covariate_data = pp.resample_draws(covariate_data, resampling_map)

    if covariate_config.aggregator is not None:
        hierarchy = pp.load_modeled_hierarchy(data_interface)
        population = pp.load_populations(data_interface)
        covariate_data = covariate_config.aggregator(covariate_data, hierarchy, population)

    covariate_version = scenario_spec.covariates[covariate]
    location_ids = data_interface.load_location_ids()
    n_draws = data_interface.get_n_draws()

    logger.info(f'Loading and processing input data for {covariate}.')
    input_covariate_data = data_interface.load_covariate(covariate, covariate_version, location_ids, with_observed=True)
    covariate_observed = input_covariate_data.reset_index(level='observed')
    covariate_data = covariate_data.merge(covariate_observed, left_index=True,
                                          right_index=True, how='outer').reset_index()
    draw_cols = [f'draw_{i}' for i in range(n_draws)]
    if 'date' in covariate_data.columns:
        index_cols = ['location_id', 'date', 'observed']
    else:
        index_cols = ['location_id', 'observed']

    covariate_data = covariate_data.set_index(index_cols)[draw_cols]
    covariate_data['modeled'] = covariate_data.notnull().all(axis=1).astype(int)

    input_covariate = pd.concat([input_covariate_data.reorder_levels(index_cols)] * n_draws, axis=1)
    input_covariate.columns = draw_cols
    covariate_data = covariate_data.combine_first(input_covariate).set_index('modeled', append=True)

    logger.info(f'Saving data for {covariate}.')
    if covariate_config.draw_level:
        data_interface.save_output_draws(covariate_data.reset_index(), scenario_name, covariate_config.label)

    summarized_data = pp.summarize(covariate_data)
    data_interface.save_output_summaries(summarized_data.reset_index(), scenario_name, covariate_config.label)