def aggregate_injuries(self, df): ct = dbtrees.causetree( cause_set_version_id=self.como_version.cause_set_version_id) df = agg_hierarchy(tree=ct, df=df, index_cols=self.index_cols, data_cols=self.draw_cols, dimension="cause_id") df = df[self.index_cols + self.draw_cols] # aggregate ncodes df = df.merge(self.como_version.ncode_hierarchy) df_agg = df.copy() df_agg = df_agg.groupby([ "age_group_id", "location_id", "year_id", "sex_id", "measure_id", "cause_id", "parent_id" ])[self.draw_cols].sum().reset_index() df_agg = df_agg.rename(columns={"parent_id": "rei_id"}) # set attribute df = df.append(df_agg) df = df[self.index_cols + self.draw_cols] for col in self.index_cols: df[col] = df[col].astype(int) return df
def new_agg_cause_exceptions(self): ct = dbtrees.causetree( cause_set_version_id=self.cause_set_version_id).db_format() ct = ct.rename(columns={"location_id": "cause_id"}) ct = ct[ct.parent_id.isin([589, 400, 521, 426, 332, 298, 297, 510, 487, 587])] ct = ct[["cause_id", "parent_id"]] self.agg_cause_exceptions = ct
def aggregate_causes(self): ct = dbtrees.causetree( cause_set_version_id=self.como_version.cause_set_version_id) ct = deepcopy(ct) self.imp_df = common.agg_hierarchy(tree=ct, df=self.imp_df, index_cols=self.index_cols + ["cause_id", "rei_id"], data_cols=self.draw_cols, dimension="cause_id")
def aggregate_cause(self, df, cause_set_version_id): ct = dbtrees.causetree(cause_set_version_id=cause_set_version_id) df = common.agg_hierarchy(tree=ct, df=df, index_cols=self.index_cols, data_cols=self.draw_cols, dimension="cause_id") df = df[self.index_cols + self.draw_cols] for col in self.index_cols: df[col] = df[col].astype(int) return df
def new_agg_cause_map(self): ct = dbtrees.causetree(self.cause_set_version_id, None, None) acm = [] for n in ct.nodes: if (len(n.all_descendants()) > 0 and n.id not in ( self.agg_cause_exceptions.parent_id.unique().tolist())): leaves = [l.id for l in n.leaves()] ac_seq = self.sequela_list[ self.sequela_list.cause_id.isin(leaves)] ac_seq['cause_id'] = n.id acm.append(ac_seq) acm = pd.concat(acm) self.agg_cause_map = acm
def new_injuries_index(self): inj_keys = [u"cause_id", u"rei_id"] inj_pairs = self.injury_sequela[inj_keys] inj_pairs = inj_pairs[~inj_pairs.duplicated()] cause_tree = dbtrees.causetree( cause_set_version_id=self.cause_set_version_id) inj_pairs = propagate_hierarchy(cause_tree, inj_pairs, "cause_id") agg_pairs = inj_pairs.merge(self.ncode_hierarchy) agg_pairs = agg_pairs.drop("rei_id", axis=1) agg_pairs = agg_pairs.rename(columns={"parent_id": "rei_id"}) inj_pairs = inj_pairs.append(agg_pairs[inj_keys]) inj_pairs = inj_pairs[~inj_pairs.duplicated()] inj_vals = list(set(tuple(x) for x in inj_pairs[inj_keys].values)) self.injuries_index = {tuple(inj_keys): inj_vals}
def new_impairment_index(self): rei_keys = ["rei_id", "cause_id"] sequela_cause = self.sequela_list[["sequela_id", "cause_id"]] imp_pairs = sequela_cause.merge(self.impairment_sequela) imp_pairs = imp_pairs[rei_keys] imp_pairs = imp_pairs[~imp_pairs.duplicated()] cause_tree = dbtrees.causetree( cause_set_version_id=self.cause_set_version_id) imp_pairs = propagate_hierarchy(cause_tree, imp_pairs, "cause_id") rei_tree = dbtrees.reitree(rei_set_id=4) imp_pairs = propagate_hierarchy(rei_tree, imp_pairs, "rei_id") imp_pairs = imp_pairs[imp_pairs.rei_id != 191] rei_vals = list(set(tuple(x) for x in imp_pairs[rei_keys].values)) self.impairment_index = {tuple(rei_keys): rei_vals}
def generate_agg_cause_map(self): ct = dbtrees.causetree(None, 9) acm = [] for n in ct.nodes: if len(n.all_descendants()) > 0: leaves = [l.id for l in n.leaves()] ac_seq = self.seq_map[self.seq_map.cause_id.isin(leaves)] ac_seq['cause_id'] = n.id if len(ac_seq) == 0: print n.info acm.append(ac_seq) acm = pd.concat(acm) acm.to_csv("{rd}/info/agg_cause_map.csv".format(rd=self.root_dir), index=False) return acm
def _cache_cause_hierarchy(self) -> None: logger.debug("Starting to load cause_hierarchy cache") tree_list = [] for cause_set_id in self.cause_set_ids: sub_tree_list = dbtrees.causetree( cause_set_id=cause_set_id, gbd_round_id=self.gbd_round_id, return_many=True) for tree in sub_tree_list: tree_list.append(tree) cache_file = "FILEPATH".format( self.cache_dir) pickle.dump(tree_list, open(cache_file, "wb")) logger.debug("Cached cause_hierarchies in {}".format( cache_file))
def new_impairment_index(self): rei_keys = ["rei_id", "cause_id"] sequela_cause = self.sequela_list[["sequela_id", "cause_id"]] imp_pairs = sequela_cause.merge(self.impairment_sequela) imp_pairs = imp_pairs[rei_keys] imp_pairs = imp_pairs[~imp_pairs.duplicated()] cause_tree = dbtrees.causetree( cause_set_version_id=self.cause_set_version_id) imp_pairs = propagate_hierarchy(cause_tree, imp_pairs, "cause_id") # 4 - GBD Reporting Impairments rei_tree = dbtrees.reitree(rei_set_id=4, gbd_round_id=self.gbd_round_id) imp_pairs = propagate_hierarchy(rei_tree, imp_pairs, "rei_id") # rei_id 191 - Impairments (this is the aggregate) imp_pairs = imp_pairs[imp_pairs.rei_id != 191] rei_vals = list(set(tuple(x) for x in imp_pairs[rei_keys].values)) self.impairment_index = {tuple(rei_keys): rei_vals}
def aggregate_cause(self, df, cause_set_version_id): ct = dbtrees.causetree(cause_set_version_id=cause_set_version_id) df = common.agg_hierarchy(tree=ct, df=df, index_cols=self.index_cols, data_cols=self.draw_cols, dimension="cause_id") df = df[self.index_cols + self.draw_cols] for col in self.index_cols: df[col] = df[col].astype(int) all_done = df.cause_id.unique().tolist() cflat = view_cause_hierarchy_history( self.como_version.reporting_cause_set_version_id) cflat = cflat[[ 'cause_id', 'path_to_top_parent', 'parent_id', 'cause_name', 'acause', 'cause_outline' ]] roots = cflat[cflat.cause_id == cflat.parent_id].parent_id cts = [] for root in roots: thisflat = cflat[cflat.path_to_top_parent.str.startswith( str(root))] ct = tree.parent_child_to_tree( thisflat, 'parent_id', 'cause_id', info_cols=['cause_name', 'acause', 'cause_outline']) cts.append(ct) reporting_dfs = [] for ct in cts: rep_only = common.agg_hierarchy(tree=ct, df=df.copy(deep=True), index_cols=self.index_cols, data_cols=self.draw_cols, dimension="cause_id") reporting_dfs.append(rep_only) reporting_df = pd.concat(reporting_dfs) reporting_df = reporting_df[~reporting_df["cause_id"].isin(all_done)] reporting_df = reporting_df[self.index_cols + self.draw_cols] for col in self.index_cols: reporting_df[col] = reporting_df[col].astype(int) return pd.concat([df, reporting_df]).reset_index(drop=True)
def new_agg_cause_exceptions(self): ct = dbtrees.causetree( cause_set_version_id=self.cause_set_version_id).db_format() ct = ct.rename(columns={"location_id": "cause_id"}) # For these causes, the sum of underlying cause is equal to the parent # cause. This is an exception as prevelence is not usually additive at # the cause, just the sequela level. # 297 - Tuberculosis # 298 - HIV/AIDS # 400 - Acute hepatitis # 417 - Liver cancer # 487 - Leukemia # 510 - Pneumoconiosis # 521 - Cirrhosis and other chronic liver diseases # 587 - Diabetes mellitus # 589 - Chronic kidney disease ct = ct[ct.parent_id.isin( [589, 400, 521, 417, 298, 297, 510, 487, 587])] ct = ct[["cause_id", "parent_id"]] self.agg_cause_exceptions = ct
def agg_cause_hierarchy(df): thisdf = df.copy() ct = dbtrees.causetree(None, 9) idx_cols = ['location_id', 'year_id', 'age_group_id', 'sex_id'] if 'rei_id' in thisdf.columns: idx_cols.append('rei_id') md = ct.max_depth() lvl = md - 1 while lvl >= 0: aggs = [] for cause in ct.level_n_descendants(lvl): child_ids = [c.id for c in cause.children] if len(child_ids) > 0: agg = thisdf[thisdf.cause_id.isin(child_ids)] agg = agg.groupby(idx_cols).sum().reset_index() agg['cause_id'] = cause.id aggs.append(agg) aggs = pd.concat(aggs) thisdf = pd.concat([thisdf, aggs]) lvl = lvl - 1 thisdf = thisdf.groupby(idx_cols + ['cause_id']).sum().reset_index() return thisdf
def get_cause_tree(self): ct = dbtrees.causetree(self.como_version.cause_set_version_id, None, None) return deepcopy(ct)
def get_cause_tree(cv): ct = dbtrees.causetree(cv.cause_set_version_id, None, None) return deepcopy(ct)
def tree(self) -> hierarchies.tree.Tree: if self._tree is None: self._tree = causetree(cause_set_version_id=self.set_version_id, gbd_round_id=self.gbd_round_id) return self._tree
def new_cause_index(self): ctree = dbtrees.causetree( cause_set_version_id=self.cause_set_version_id) cause_id = [node.id for node in ctree.nodes] self.cause_index = {"cause_id": cause_id}