def redist_excess(excess_id, redist_map, copy_map, year_id, out_dir): # declare calculation dimensions dim = RedistExcess.default_idx_dmnsns dim["year_id"] = year_id dim["measure_id"] = [5] dim["sex_id"] = [2] # run redistribution redistributer = RedistExcess(excess_id, redist_map, idx_dmnsns=dim) redistributer.redistribute_excess() # copy dimensions of redistributer instance then update measure_id # so we can copy over incidence draws as directed in copy_map dim_inc = copy.deepcopy(redistributer.idx_dmnsns) dim_inc["measure_id"] = [6] # incidence inc = draws.SquareImport(idx_dmnsns=dim_inc) inc_dict = {} for me_id in copy_map.keys(): inc_dict[copy_map[me_id]] = inc.import_square( gopher_what={"modelable_entity_id": me_id}, source="epi") # save to disk for me_id in redist_map.values(): fname = str(year_id[0]) + ".h5" out_df = redistributer.me_dict[me_id].reset_index() if me_id in copy_map.values(): inc_df = inc_dict[me_id].reset_index() out_df = pd.concat([out_df, inc_df]) out_df.to_hdf(os.path.join(out_dir, str(me_id), fname), key="draws", format="table", data_columns=dim.keys())
def import_sex_prop(self, modelable_entity_id, sex_id): # desired index shape (contains prev and incidence in index, no sex_id) broadcast_idx_dmnsns = self.base_importer.idx_dmnsns.copy() broadcast_idx_dmnsns.pop("sex_id") broadcast_idx = pd.MultiIndex.from_product( broadcast_idx_dmnsns.values(), names=broadcast_idx_dmnsns.keys()) broadcast_df = pd.DataFrame(index=broadcast_idx) # get import shape gopher_idx_dmnsns = broadcast_idx_dmnsns.copy() gopher_idx_dmnsns["sex_id"] = [sex_id] gopher_idx_dmnsns["measure_id"] = [18] # import using specified dimensions importer = draws.SquareImport(idx_dmnsns=gopher_idx_dmnsns) tmp_df = importer.import_square( {"modelable_entity_id": modelable_entity_id}, source="epi") # broadcast to preferred shape broadcast_over = broadcast_idx_dmnsns.keys() broadcast_over.remove("measure_id") tmp_df = tmp_df.reset_index()[broadcast_over + importer.draw_cols] df = pd.merge(broadcast_df.reset_index(), tmp_df, on=broadcast_over, how="left").set_index(broadcast_idx_dmnsns.keys()) df.fillna(value=0, inplace=True) return df
def import_couples(self, modelable_entity_id): # desired index shape (contains prev and incidence in index and male) broadcast_idx_dmnsns = self.base_importer.idx_dmnsns.copy() broadcast_df = self.base_importer.get_index_df() # get import shape gopher_idx_dmnsns = broadcast_idx_dmnsns.copy() gopher_idx_dmnsns["sex_id"] = [2] # import using specified dimensions importer = draws.SquareImport(idx_dmnsns=gopher_idx_dmnsns) tmp_df = importer.import_square( {"modelable_entity_ids": [modelable_entity_id]}, source="dismod") # broadcast to preferred shape broadcast_over = broadcast_idx_dmnsns.keys() broadcast_over.remove("sex_id") tmp_df = tmp_df.reset_index()[broadcast_over + importer.draw_cols] df = pd.merge(broadcast_df.reset_index(), tmp_df, on=broadcast_over, how="left").set_index(broadcast_idx_dmnsns.keys()) df.fillna(value=0, inplace=True) return df
def __init__(self, me_map, **kwargs): # super init super(Split, self).__init__(**kwargs) # store data by me in this dict # key=me_id self.me_map = me_map self.me_dict = {} #import every input and create a dictionary of dataframes for mapper_key, mapper in me_map.items(): inputs = mapper.get("srcs", {}) for src_key, me_id in inputs.items(): if src_key == "profound_prop": print("Inside prop, retrieving meid {}".format(me_id)) dim_prop = copy.deepcopy(self.idx_dmnsns) dim_prop["measure_id"] = [18] # proportion prop = draws.SquareImport(idx_dmnsns=dim_prop) self.me_dict[me_id] = prop.import_square( gopher_what={"modelable_entity_id": me_id}, source="epi") else: print("Inside else, retrieving meid {}".format(me_id)) self.me_dict[me_id] = self.import_square( gopher_what={"modelable_entity_id": me_id}, source="epi")
def __init__(self, male_prop_id, female_prop_id, exp_id, env_id, year_id): # base dimensions importer dim = draws.SquareImport.default_idx_dmnsns dim["year_id"] = year_id dim["measure_id"] = [5] self.base_importer = draws.SquareImport(idx_dmnsns=dim) # import and scale sex proportions self.male_prop = self.import_sex_prop(male_prop_id, 1) self.female_prop = self.import_sex_prop(female_prop_id, 2) self.scale_props() # import exposure and env self.exposure = self.import_exposure(exp_id) self.envelope = self.import_couples(env_id)
def __init__(self, me_map, cause_name, **kwargs): # super init super(HrtFailImpCong, self).__init__(**kwargs) # store data by me in this dict # key=me_id, val=dataframe self.me_map = me_map self.cause_name = cause_name self.csmr_dict = {} self.hrtf_dict = {} self.sqz_dict = {} self.out_dict = {} ''' The .import_square() method of the SquareImport class is what will call the get_draws shared function. It grabs draws for one me_id at a time and shapes the dataframe for downstream manipulation. ''' for mapper_key, mapper in me_map.items(): inputs = mapper.get("srcs", {}) for key, me_id in inputs.items(): if (mapper_key == "Other") & (key == "tot"): pass elif key == "tot": '''the import_square function sets up a multiindex based off the values in the dimension dictionary first given to it. Since we have to use a different measure_id to grab csmr we can't self.import_square because it will call df = pd.concat([self.index_df, df], axis=1) with the measure_id we first used. Instead, we need to instantiate a new instance of the SquareImport class with the heart failure dimensions, and use the import_square method from that object ''' dim_csmr = copy.deepcopy(self.idx_dmnsns) # 15=cause-specific mortality rate (csmr) dim_csmr["measure_id"] = [15] csmr = draws.SquareImport(idx_dmnsns=dim_csmr) self.csmr_dict[me_id] = csmr.import_square( gopher_what={"modelable_entity_id": me_id}, source="epi") elif key == "sqzd": self.sqz_dict[me_id] = self.import_square( gopher_what={"modelable_entity_id": me_id}, source="epi") else: self.hrtf_dict[me_id] = self.import_square( gopher_what={"modelable_entity_id": me_id}, source="epi")
def calc_heart_failure(self): # grab dimensions used in this class instantiation # modify as necessary to get CSMR (cause-specific mortality rates) # since code is parallelized by year, year_id should already be reduced to one year dim_hf = copy.deepcopy(self.idx_dmnsns) dim_hf["measure_id"] = [15] # cause-specific mortality rate (csmr) # the import_square function sets up a multiindex based off the values in the dimension dictionary first passed in # since we have to use a different measure_id to grab csmr we can't self.import_square because it will call df = pd.concat([self.index_df, df], axis=1) with the measure_id we first used. # instead, we need to instantiate a new instance of the SquareImport class with the heart failure dimensions, and use the import_square method from that heart_failure = draws.SquareImport(idx_dmnsns=dim_hf) # get draws for csmr measure_id and src me_ids csmr_dict = {} csmr_prop_dict = {} for sub_group, mapper in self.me_map.items(): if mapper["type"] == "sub_group": inputs = mapper.get("srcs",{}) for src_key, me_id in inputs.items(): if src_key != "bundle": csmr_dict[me_id] = heart_failure.import_square(gopher_what={"modelable_entity_ids": [me_id]}, source="dismod") # aggregate csmr subgroups # measure_id = 15 csmr_sub_causes = pd.DataFrame() for me_id in csmr_dict.keys(): csmr_sub_causes = pd.concat([csmr_sub_causes, csmr_dict[me_id]]) # collapse on keys sigma_csmr_sub_causes = csmr_sub_causes.groupby(level=self.idx_dmnsns.keys()).sum() # calc proportions for csmr me_ids for me_id in csmr_dict.keys(): # csmr_dict[me_id] is not changed in this process, no copy is necessary prop = csmr_dict[me_id] / sigma_csmr_sub_causes prop.fillna(0, inplace=True) assert (prop[self.draw_cols] > 1.0).any().any() == False assert (prop[self.draw_cols] < 0.0).any().any() == False assert prop.isnull().values.any() == False # reindex prop df so that measure_id is 5 instead of 15, that way it can be joined with the other dataframes (everything else stays the same, only the measure_id changes) prop = prop.reset_index() prop.loc[:,'measure_id'] = 5 prop = prop.set_index(self.idx_dmnsns.keys()) prop = prop[self.draw_cols] prop = pd.concat([self.index_df, prop], axis=1) csmr_prop_dict[me_id] = prop # incorporate heart failure severities for sub_group, mapper in self.me_map.items(): if mapper["type"] == "sub_group": for severity in ["mild", "moderate", "severe"]: split_trg_id = self.me_map[sub_group]['trgs'][severity] hf_src_id = self.me_map['heart failure']['srcs'][severity] csmr_id = self.me_map[sub_group]['srcs']['tot'] # At draw level, multiply heart failure severity X CSMR proportion -> save to the target_modelable_entity_id self.me_dict[split_trg_id] = (self.me_dict[hf_src_id] * csmr_prop_dict[csmr_id]) assert self.me_dict[split_trg_id]['draw_0'][0] == (self.me_dict[hf_src_id]['draw_0'][0] * csmr_df['draw_0'][0]) # aggregate congenital heart failure severity groups severity_df = pd.DataFrame() for severity in ["mild", "moderate", "severe"]: split_trg_id = self.me_map[sub_group]['trgs'][severity] severity_df = pd.concat([severity_df, self.me_dict[split_trg_id]]) # collapse on keys sigma_severity_df = severity_df.groupby(level=self.idx_dmnsns.keys()).sum() # calc non heart failure portion and save to the target_modelable_entity_id sqzd_id = self.me_map[sub_group]['trgs']['sqzd'] non_sqzd = self.me_map[sub_group]['srcs']['tot'] if str(sqzd_id) == '10937': sqzd_df = self.calc_vsd_asd_asymptomatic() else: sqzd_df = self.me_dict[sqzd_id].copy() residual = sqzd_df - sigma_severity_df residual[residual<0] = 0 assert (residual < 0.0).any().any() == False assert residual.isnull().values.any() == False assert residual['draw_0'][0] == (sqzd_df['draw_0'][0] - sigma_severity_df['draw_0'][0]) non_hf_trg_id = self.me_map[sub_group]['trgs']['none'] self.me_dict[non_hf_trg_id] = residual # copy cong_heart sqzd other into "15756 Congenital heart disease due to other congenital cardiovascular anomalies before ID severity split". # no other calculations are necessary at this point in time. other_none_id = self.me_map["Other"]['trgs']['none'] other_sqzd_id = self.me_map["Other"]['trgs']['sqzd'] self.me_dict[other_none_id] = self.me_dict[other_sqzd_id].copy()