def get_locations_modeled_and_missing(data_interface: ForecastDataInterface): hierarchy = load_hierarchy(data_interface) modeled_locations = data_interface.load_location_ids() most_detailed_locs = hierarchy.loc[hierarchy.most_detailed == 1, 'location_id'].unique().tolist() missing_locations = list( set(most_detailed_locs).difference(modeled_locations)) locations_modeled_and_missing = { 'modeled': modeled_locations, 'missing': missing_locations } return locations_modeled_and_missing
def postprocess_covariate(data_interface: ForecastDataInterface, resampling_map: Dict[int, Dict[str, List[int]]], scenario_spec: ScenarioSpecification, scenario_name: str, covariate: str) -> None: covariate_config = COVARIATES[covariate] logger.info(f'Loading {covariate}.') covariate_data = covariate_config.loader(covariate, covariate_config.time_varying, scenario_name, data_interface) logger.info(f'Concatenating and resampling {covariate}.') covariate_data = pd.concat(covariate_data, axis=1) covariate_data = pp.resample_draws(covariate_data, resampling_map) if covariate_config.aggregator is not None: hierarchy = pp.load_modeled_hierarchy(data_interface) population = pp.load_populations(data_interface) covariate_data = covariate_config.aggregator(covariate_data, hierarchy, population) covariate_version = scenario_spec.covariates[covariate] location_ids = data_interface.load_location_ids() n_draws = data_interface.get_n_draws() logger.info(f'Loading and processing input data for {covariate}.') input_covariate_data = data_interface.load_covariate(covariate, covariate_version, location_ids, with_observed=True) covariate_observed = input_covariate_data.reset_index(level='observed') covariate_data = covariate_data.merge(covariate_observed, left_index=True, right_index=True, how='outer').reset_index() draw_cols = [f'draw_{i}' for i in range(n_draws)] if 'date' in covariate_data.columns: index_cols = ['location_id', 'date', 'observed'] else: index_cols = ['location_id', 'observed'] covariate_data = covariate_data.set_index(index_cols)[draw_cols] covariate_data['modeled'] = covariate_data.notnull().all(axis=1).astype(int) input_covariate = pd.concat([input_covariate_data.reorder_levels(index_cols)] * n_draws, axis=1) input_covariate.columns = draw_cols covariate_data = covariate_data.combine_first(input_covariate).set_index('modeled', append=True) logger.info(f'Saving data for {covariate}.') if covariate_config.draw_level: data_interface.save_output_draws(covariate_data.reset_index(), scenario_name, covariate_config.label) summarized_data = pp.summarize(covariate_data) data_interface.save_output_summaries(summarized_data.reset_index(), scenario_name, covariate_config.label)