def save_mortality(ecode, year_id, sex_id, locs, ages, version): cause_id = help.get_cause(ecode) draws = gd.get_draws( gbd_id_type="cause_id", gbd_id=cause_id, location_id=locs, year_id=year_id, sex_id=sex_id, age_group_id=ages, status="best", source="codem", gbd_round_id=help.GBD_ROUND ) draws[help.drawcols()] = draws[help.drawcols()].divide(draws['pop'], axis=0) draws.drop(['pop', 'envelope', 'cause_id', 'sex_name', 'measure_id', 'metric_id'], axis=1, inplace=True) draws.set_index(['location_id','year_id','sex_id','age_group_id'], inplace=True) mort = etl.df_to_xr(draws, wide_dim_name='draw', fill_value=np.nan) filename = "FILEPATH.nc".format(str(year_id), str(sex_id)) folder = os.path.join("FILEPATH") if not os.path.exists(folder): try: os.makedirs(folder) except OSError as e: if e.errno != os.errno.EEXIST: raise pass filepath = os.path.join(folder, filename) print("Writing mortality") mort.to_netcdf(filepath)
def import_square(self, meid, source, filler=None, **kwargs): """get draws for the specified modelable entity by dimensions""" if not kwargs: kwargs = self.idx_dmnsns.copy() # spillover from needing to replace keys with their plural form for gopher.draws for k in kwargs.keys(): kwargs[k] = kwargs.pop(k) if filler is None: filler = 0 df = get_draws('modelable_entity_id', meid, source=source, status='latest', decomp_step='step1', **kwargs) for c in self.idx_dmnsns.keys(): df[c] = pd.to_numeric(df[c]) df = df.set_index(self.idx_dmnsns.keys()) df = df[self.draw_cols] df = pd.concat([self.index_df, df], axis=1) df.fillna(value=filler, inplace=True) return df
def get_measures_get_draws(me_id, locs, years, sexes, ages, inc_id, rms_id, emr_id): draws = gd.get_draws( gbd_id_type="modelable_entity_id", gbd_id=me_id, location_id=locs, year_id=years, sex_id=sexes, age_group_id=ages, status="best", source="epi", measure_id=[inc_id, rms_id, emr_id], gbd_round_id=help.GBD_ROUND) dropcols = ['modelable_entity_id', 'model_version_id', 'metric_id'] draws.drop(dropcols, axis=1, inplace=True) indexcols = ['location_id', 'year_id', 'age_group_id', 'sex_id'] draws.set_index(indexcols, inplace=True) incidence = draws.loc[draws['measure_id'] == inc_id].drop('measure_id', axis=1) remission = draws.loc[draws['measure_id'] == rms_id].drop('measure_id', axis=1) emr = draws.loc[draws['measure_id'] == emr_id].drop('measure_id', axis=1) m_dict = {'incidence': incidence, 'remission': remission, 'emr': emr} return m_dict
def read_model_draws(cause_id, model_version_id, location_ids, decomp_step_id, gbd_round_id): """ Reads draws using get_draws. :param cause_id: cause ID for the model :param model_version_id: model_version_id to be read from :param location_ids: list of ints list of location ids to filter the draws by :param decomp_step_id: int decomposition step ID :param gbd_round_id: int GBD round ID :return: dataframe pandas dataframe of the draws for a given list of locations """ logger.info("Reading draws with get_draws for cause ID {}," "model_version_id {}.".format(cause_id, model_version_id)) df = get_draws(gbd_id_type='cause_id', gbd_id=int(cause_id), source='codem', version_id=model_version_id, location_id=location_ids, decomp_step=decomp_step_from_decomp_step_id(decomp_step_id), gbd_round_id=int(gbd_round_id)) return df
def get_measures_get_draws(ecode, locs, years, sexes, ages, inc_id, rms_id, emr_id, decomp): me_id = help.get_me(ecode) best_version = db.get_best_model_versions(entity="modelable_entity", ids=me_id, status="best", decomp_step=decomp, gbd_round_id=help.GBD_ROUND) draws = gd.get_draws( gbd_id_type="modelable_entity_id", gbd_id=me_id, location_id=locs, year_id=years, sex_id=sexes, age_group_id=ages, status="best", source="epi", measure_id=[inc_id, rms_id, emr_id], gbd_round_id=help.GBD_ROUND, decomp_step=decomp ) dropcols = ['modelable_entity_id', 'model_version_id', 'metric_id'] draws.drop(dropcols, axis=1, inplace=True) indexcols = ['location_id', 'year_id', 'age_group_id', 'sex_id'] draws.set_index(indexcols, inplace=True) incidence = draws.loc[draws['measure_id'] == inc_id].drop('measure_id', axis=1) remission = draws.loc[draws['measure_id'] == rms_id].drop('measure_id', axis=1) emr = draws.loc[draws['measure_id'] == emr_id].drop('measure_id', axis=1) emr = emr.reindex(incidence.index) return {'incidence': incidence, 'remission': remission, 'emr': emr}
def _dummy_draw_call(source_meid, source_mvid, gbd_round_id, decomp_step): """ Query a small set of draws to help us infer our n_draws and sampling arguments. Arguments: source_meid (int): the parent me_id source_mvid (int): the model version associated with the best model for our source_meid. gbd_round_id (int) decomp_step (str) Returns: pd.DataFrame """ return get_draws(source='epi', gbd_id_type='modelable_entity_id', gbd_id=source_meid, version_id=source_mvid, location_id=1, sex_id=[1, 2], year_id=2000, age_group_id=22, gbd_round_id=gbd_round_id, decomp_step=decomp_step)
def get_como_draws(entity_id: int, location_id: int, entity_type: str = 'cause') -> pd.DataFrame: """Gets draw level epi parameters for a particular cause, location, and gbd round.""" # FIXME: Should submit a ticket to IT to determine if we need to specify an # output_version_id or a model_version_id to ensure we're getting the correct results # publication_ids = get_publication_ids_for_round(GBD_ROUND_ID) # version_id = get_gbd_tool_version(publication_ids, source='codcorrect') id_type = 'cause_id' if entity_type == 'cause' else 'sequela_id' publication_ids = get_publication_ids_for_round( project_globals.GBD_ROUND_ID) # NOTE: Currently this doesn't pull any thing because the tables haven't been built yet, # but get_draws doesn't mind and this will automatically update once the DB tables are in place - J.C 11/20 model_version = get_gbd_tool_version(publication_ids, 'como') return get_draws( gbd_id_type=id_type, gbd_id=entity_id, source="como", location_id=location_id, sex_id=[vi_globals.SEXES['Male'], vi_globals.SEXES['Female']], age_group_id=get_age_group_id(), version_id=model_version, year_id=get_estimation_years(project_globals.GBD_ROUND_ID), gbd_round_id=project_globals.GBD_ROUND_ID)
def import_square(self, gopher_what, source, filler=None, **kwargs): """get draws for the specified modelable entity by dimensions""" if not kwargs: kwargs = self.idx_dmnsns.copy() if filler is None: filler = 0 df = get_draws(gbd_id_type=gopher_what.keys(), gbd_id=gopher_what.values(), source=source, measure_id=kwargs['measure_id'], location_id=kwargs['location_id'], year_id=kwargs['year_id'], age_group_id=kwargs['age_group_id'], sex_id=kwargs['sex_id'], gbd_round_id=5) for c in self.idx_dmnsns.keys(): df[c] = pd.to_numeric(df[c]) df = df.set_index(self.idx_dmnsns.keys()) df = df[self.draw_cols] df = pd.concat([self.index_df, df], axis=1) df.fillna(value=filler, inplace=True) return df
def make_new_hgb_mean_file(): ''' This function writes a flat file of the draws for mean hemoglobin. Will only work with many slots. Run this function prior to anemia CA, any time that the mean hemoglobin model has been updated. ''' print("Operation starting") hgb = get_draws('modelable_entity_id', 10487, 'epi', gbd_round_id=6, decomp_step='step4') hgb["hgb_mean"] = hgb[['draw_%s' % d for d in list(range(1000))]].mean(axis=1) hgb["mean_hgb"] = hgb["hgb_mean"] hgb = hgb.drop(['measure_id', 'modelable_entity_id', 'model_version_id'], axis=1) renames = {'draw_%s' % d: 'hgb_%s' % d for d in list(range(1000))} hgb.rename(columns=renames, inplace=True) print("Writing File") hgb.to_hdf( 'FILEPATH', key='draws', mode='w', format='table', data_columns=['location_id', 'age_group_id', 'year_id', 'sex_id'])
def save_mortality(ecode, year_id, sex_id, locs, ages, decomp, version): cause_id = help.get_cause(ecode) draws = gd.get_draws( gbd_id_type="cause_id", gbd_id=cause_id, location_id=locs, year_id=year_id, sex_id=sex_id, age_group_id=ages, status="best", source="codem", gbd_round_id=help.GBD_ROUND, decomp_step=decomp ) draws[help.drawcols()] = draws[help.drawcols()].divide(draws['pop'], axis=0) draws.drop(['pop', 'envelope', 'cause_id', 'sex_name', 'measure_id', 'metric_id'], axis=1, inplace=True) draws.set_index(['location_id','year_id','sex_id','age_group_id'], inplace=True) mort = etl.df_to_xr(draws, wide_dim_name='draw', fill_value=np.nan) filename = "mort_{}_{}.nc".format(str(year_id), str(sex_id)) version = version.rstrip() folder = os.path.join(paths.DATA_DIR, decomp, inj_info.ECODE_PARENT[ecode], version, 'mortality_for_shocks') if not os.path.exists(folder): try: os.makedirs(folder) except OSError as e: if e.errno != os.errno.EEXIST: raise pass filepath = os.path.join(folder, filename) mort.to_netcdf(filepath)
def make_shiftprev_draws(location, year_id, gbd_round_id, decomp_step, norm_df): prevdf = compile_report_group_totals(location, year_id, gbd_round_id, decomp_step) mean_hgb = get_draws('modelable_entity_id', 10487, 'epi', location_id=location, year_id=year_id, gbd_round_id=gbd_round_id, decomp_step=decomp_step) mean_hgb.sort_values(by=['location_id', 'year_id', 'age_group_id', 'sex_id'], inplace=True) mean_hgb.set_index(['location_id', 'year_id', 'age_group_id', 'sex_id'], inplace=True) shiftprev = prevdf[['draw_%s' % d for d in range(1000)]].multiply((norm_df - mean_hgb), axis="index") shiftprev = shiftprev.reset_index() return shiftprev
def grab_prevalence_draws(me_id, year_id, gbd_round_id, decomp_step) -> pd.DataFrame: # grabs prevalence draws for the given year, me_id, and locations demo = get_demographics("epi", gbd_round_id=gbd_round_id) print(demo['age_group_id'], me_id) df = get_draws('modelable_entity_id', me_id, source='epi', measure_id=5, location_id=demo['location_id'], year_id=year_id, age_group_id=demo['age_group_id'], sex_id=demo['sex_id'], gbd_round_id=gbd_round_id, decomp_step=decomp_step) return df
def fetch_draws(meid, measures, location_id): draws = get_draws(source="epi", gbd_id_type="modelable_entity_id", gbd_id=meid, measure_id=measures, location_id=location_id, gbd_round_id=6, decomp_step="iterative", version_id=ADDRESS) return draws
def get_ylds(hale_version: int, como_version: int, location_id: int, year_ids: List[int], age_group_ids: List[int], under_one_age_group_ids: List[int], draws: int, gbd_round_id: int, decomp_step_id: int) -> pd.DataFrame: """ Pulls YLDs from COMO for given location and years. This involves: - Reading the cached population - Using get_draws to read COMO draws - Aggregating neonatal/birth age groups - Subsetting to HALE ages """ # Read cached population. population_path = path_utils.get_population_path(hale_version, location_id) logging.info(f'Reading cached population from {population_path}') population_df = pd.read_feather(population_path) # Read YLDs. logging.info('Pulling YLDs from COMO draws') get_draws_args: Dict[str, Any] = { 'gbd_id_type': 'cause_id', 'gbd_id': gbd.constants.cause.ALL_CAUSE, 'source': 'como', 'measure_id': gbd.constants.measures.YLD, 'location_id': location_id, 'year_id': year_ids, 'sex_id': [gbd.constants.sex.MALE, gbd.constants.sex.FEMALE], 'gbd_round_id': gbd_round_id, 'decomp_step': gbd.decomp_step.decomp_step_from_decomp_step_id(decomp_step_id), 'version_id': como_version, 'n_draws': draws, 'downsample': True } yld_df = get_draws(**get_draws_args) logging.info('Pulled YLDs from COMO draws') # Aggregate and subset. draw_cols = [col for col in yld_df.columns if 'draw' in col] return yld_df\ .pipe(lambda df: _aggregate( df, population_df, draw_cols, under_one_age_group_ids))\ .reset_index(drop=True)\ .loc[:, columns.DEMOGRAPHICS + draw_cols]
def load_procedure_proportions(procedure_me_id, location_id): ''' Downloads estimates for the proportion of the cancer population that recieves a given procedure ''' print(" loading procedure proportions...") prop_df = get_draws(gbd_id_type='modelable_entity_id', source='epi', measure_id=18, gbd_id=procedure_me_id, location_id=location_id, status="best") return (prop_df)
def get_modelable_entity_draws(me_id: int, location_id: int) -> pd.DataFrame: """Gets draw level epi parameters for a particular dismod model, location, and gbd round.""" publication_ids = get_publication_ids_for_round( project_globals.GBD_ROUND_ID) model_version = get_dismod_model_version(me_id, publication_ids) return get_draws( gbd_id_type='modelable_entity_id', gbd_id=me_id, source="epi", location_id=location_id, sex_id=[vi_globals.SEXES['Male'], vi_globals.SEXES['Female']], age_group_id=get_age_group_id(), version_id=model_version, gbd_round_id=project_globals.GBD_ROUND_ID)
def pullDeaths(geo): codcorrect = get_draws(['cause_id', 'cause_id', 'cause_id'], [495, 496, 497], 'codcorrect', location_id=geo, year_id=year, measure_id=1, gbd_round_id=5, version_id=86, num_workers=2) codcorrect = codcorrect[codcorrect.sex_id.isin(sexes)] codcorrect = codcorrect[codcorrect['measure_id'] == 1] codcorrect = checkAges(codcorrect) return codcorrect
def get_shock_mort(ecode, pops, locs, ages, year_id, sex_id, decomp): cause_id = help.get_cause(ecode) draws = gd.get_draws(gbd_id_type="cause_id", gbd_id=cause_id, version_id=model_versions[ecode][sex_id], location_id=locs, year_id=year_id, age_group_id=ages, measure_id=1, source="codem", gbd_round_id=help.GBD_ROUND, decomp_step=model_versions['decomp']) if ecode == 'inj_war_execution': draws = draws.loc[(draws.age_group_id != 2) & (draws.age_group_id != 3), ] sub = draws[draws['age_group_id'] == 4] oth_cols = [col for col in sub.columns if 'draw_' not in col] sub.set_index(oth_cols, inplace=True) sub[:] = 0 sub = sub.reset_index() sub['age_group_id'] = 2 draws = draws.append(sub) sub = draws[draws['age_group_id'] == 4] oth_cols = [col for col in sub.columns if 'draw_' not in col] sub.set_index(oth_cols, inplace=True) sub[:] = 0 sub = sub.reset_index() sub['age_group_id'] = 3 draws = draws.append(sub) draws.drop(['cause_id', 'measure_id', 'metric_id', 'sex_name'], axis=1, inplace=True) draws.set_index(['location_id', 'year_id', 'sex_id', 'age_group_id'], inplace=True) mort = etl.df_to_xr(draws, wide_dim_name='draw', fill_value=np.nan) mort = mort / pops['population'] # gets it into rate space return mort
def get_prevalence_draws(location, meid, year_id, gbd_round_id, decomp_step): prevalence_draws = get_draws('modelable_entity_id', meid, 'epi', location_id=location, year_id=year_id, gbd_round_id=gbd_round_id, decomp_step=decomp_step) prevalence_draws.drop( ['measure_id', 'metric_id', 'model_version_id', 'modelable_entity_id'], axis=1, inplace=True) prevalence_draws.set_index( ['location_id', 'year_id', 'age_group_id', 'sex_id'], inplace=True) return prevalence_draws
def grab_prevalence_draws(me_id, year, locations): # grabs prevalence draws for the given year, me_id, and locations gbd_round = 5 measure = 5 sexes = [1, 2] ages = range(2, 21) + [30, 31, 32, 235] df = get_draws('modelable_entity_id', me_id, source='epi', location_id=locations, year_id=year, measure_id=measure, age_group_id=ages, sex_id=sexes) return df
def _get_draws(source_cause_id, gbd_round_id, decomp_step, sex_ids): """Read in all the draws from the source cause id for each sex.""" cd = [] for sex in sex_ids: version_id = best_versions(source_cause_id, gbd_round_id, sex, decomp_step) cd.append( get_draws(gbd_id_type='cause_id', gbd_id=source_cause_id, source='codem', gbd_round_id=gbd_round_id, decomp_step=decomp_step, version_id=version_id)) cd = pd.concat(cd) return cd
def calcSurvInc(params): me = params[0][0] mv = params[0][1] geo = params[1] sex = params[2] #print 'On loop %s of %s' % (count, loops) print 'Preparing to get_draws for %s' % me draws = get_draws('modelable_entity_id', me, 'epi', location_id=geo, year_id=year, sex_id=sex, gbd_round_id=5) print 'Finish get_draws, starting data processing' d_ages = draws.age_group_id.unique() if 235 in d_ages: draws = draws[draws.age_group_id.isin(ages2)] draws[['age_group_id']] = draws[['age_group_id' ]].replace(to_replace=235, value=33) elif 33 in d_ages: draws = draws[draws.age_group_id.isin(ages1)] # pull out incidence and EMR into seperate dfs incidence = draws[draws.measure_id == 6] emr = draws[draws.measure_id == 9] # keep only age_group_id and draw columns incidence = incidence[columns] emr = emr[columns] # get 28 day survivorship emr.set_index('age_group_id', inplace=True) fatality = emr / (12 + emr) survivorship = 1 - fatality # multiply incidence by 28 day survivorship:q incidence.set_index('age_group_id', inplace=True) final_incidence = incidence * survivorship # add back on identifying columns final_incidence['location_id'] = geo final_incidence['sex_id'] = sex final_incidence['modelable_entity_id'] = me return final_incidence
def calcGlobalDeaths(params): me = params[0] #geo = params[1] sex = params[1] csmr = get_draws('modelable_entity_id', me, 'epi', location_id=locations, year_id=year, sex_id=sex, gbd_round_id=5) csmr = csmr[csmr['measure_id'] == 15] csmr = checkAges(csmr) csmr = csmr[all_cols] return csmr
def load_procedure_proportions(procedure_me_id, location_id): ''' Downloads estimates for the proportion of the cancer population that recieves a given procedure ''' print(" loading procedure proportions...")\ # get decomp_step d_step = utils.get_gbd_parameter('current_decomp_step') gbd_id = utils.get_gbd_parameter('current_gbd_round') prop_df = get_draws(gbd_id_type='modelable_entity_id', source='epi', measure_id=18, gbd_id=procedure_me_id, location_id=location_id, gbd_round_id=gbd_id, decomp_step=d_step) return (prop_df)
def get_the_draws(meid, location): df = get_draws('modelable_entity_id', meid, 'epi', location_id=location, measure_id=5, metric_id=3, gbd_round_id=5) df.drop( ['measure_id', 'metric_id', 'modelable_entity_id', 'model_version_id'], axis=1, inplace=True) df = df.sort_values( by=['location_id', 'year_id', 'age_group_id', 'sex_id']) df.set_index(['location_id', 'year_id', 'age_group_id', 'sex_id'], inplace=True) return df
def compute_global_ratios(cause_id, year_id, gbd_round_id, decomp_step_id, n_draws): ylls = get_draws( "cause_id", cause_id, source="codcorrect", location_id=1, year_id=year_id, sex_id=[sex.MALE, sex.FEMALE], measure_id=measures.YLL, gbd_round_id=gbd_round_id, decomp_step=decomp_step_from_decomp_step_id(decomp_step_id), n_draws=n_draws, downsample=True) drawcols = [f'draw_{d}' for d in range(n_draws)] ratios = [] for resid_cid, yldmap in RKEY.groupby('input_cause_id'): # get the ylls these_ylls = ylls[ylls.cause_id == resid_cid] ratio_ylls = ylls[ylls.cause_id.isin(yldmap.ratio_cause_id.unique())] # aggregate the inputs to the appropriate level group_cols = ['age_group_id', 'year_id'] these_ylls = these_ylls.groupby(group_cols) these_ylls = these_ylls[drawcols].sum().mean(axis=1) ratio_ylls = ratio_ylls.groupby(group_cols) ratio_ylls = ratio_ylls[drawcols].sum().mean(axis=1) # compute the ratio ratio = these_ylls / ratio_ylls ratio = ratio.reset_index() ratio = ratio.replace(np.inf, 0) ratio = ratio.replace(np.NaN, 0) ratio["cause_id"] = resid_cid ratios.append(ratio) df = pd.concat(ratios) df_male = df.copy() df_male["sex_id"] = sex.MALE df_female = df.copy() df_female["sex_id"] = sex.FEMALE return df_male.append(df_female)
def get_draws(self, measure_id=6, age_group_list=None): '''Uses get_draws to pull draws of the ME for this class instance''' if age_group_list is None: age_group_list = [7, 8, 9, 10, 11, 12, 13, 14, 15] draws = get_draws(gbd_id_type='modelable_entity_id', gbd_id=self.input_me, source='epi', measure_id=measure_id, location_id=self.most_detailed_locs, year_id=self.year_id, age_group_id=self.most_detailed_ages, sex_id=2, gbd_round_id=self.gbd_round) # set all ages not in the age_group_list list to zero keep_cols, index_cols, draw_cols = self.keep_cols() draws.loc[~draws.age_group_id.isin(age_group_list), draw_cols] = 0. return draws
def pull_codcorrect_draws(self): logger.info("Pulling codcorrect draws...") codcorrect_df = get_draws( 'cause_id', self.cause_id, year_id=self.year_id, source='codcorrect', sex_id=self.sex_id, measure_id=[1], location_id=self.location_ids, version_id=self.cod_process_v, decomp_step=self.decomp_step, gbd_round_id=self.gbd_round_id) logger.info("Successfully pulled codcorrect draws.") codcorrect_df = codcorrect_df.loc[ codcorrect_df.age_group_id.isin( list(self.age_group_ids) + list(self.aggregated_age_group_ids.keys())),:] return codcorrect_df[self.index_cols + ['cause_id'] + self.draw_cols]
def get_the_draws(modelable_entity_id, location_id, gbd_round_id, decomp_step) -> pd.DataFrame: """ Parallelized by location """ df = get_draws('modelable_entity_id', modelable_entity_id, 'epi', location_id=location_id, measure_id=5, metric_id=3, gbd_round_id=gbd_round_id, decomp_step=decomp_step) df.drop( ['measure_id', 'metric_id', 'modelable_entity_id', 'model_version_id'], axis=1, inplace=True) df = df.sort_values( by=['location_id', 'year_id', 'age_group_id', 'sex_id']) df.set_index(['location_id', 'year_id', 'age_group_id', 'sex_id'], inplace=True) return df
def load_lri_birth_prevalence_from_meid(_, location): """Ignore the first argument to fit in to the get_data model. """ location_id = utility_data.get_location_id(location) data = get_draws('modelable_entity_id', project_globals.LRI_BIRTH_PREVALENCE_MEID, source=project_globals.LRI_BIRTH_PREVALENCE_DRAW_SOURCE, age_group_id=project_globals.LRI_BIRTH_PREVALENCE_AGE_ID, measure_id=vi_globals.MEASURES['Prevalence'], gbd_round_id=project_globals.LRI_BIRTH_PREVALENCE_GBD_ROUND, location_id=location_id) data = data[data.measure_id == vi_globals.MEASURES['Prevalence']] data = utilities.normalize(data, fill_value=0) idx_columns = list(vi_globals.DEMOGRAPHIC_COLUMNS) idx_columns.remove('age_group_id') data = data.filter(idx_columns + vi_globals.DRAW_COLUMNS) data = utilities.reshape(data) data = utilities.scrub_gbd_conventions(data, location) data = utilities.split_interval(data, interval_column='year', split_column_prefix='year') return utilities.sort_hierarchical_data(data)