示例#1
0
    def aggregate_injuries(self, df):
        ct = dbtrees.causetree(
            cause_set_version_id=self.como_version.cause_set_version_id)
        df = agg_hierarchy(tree=ct,
                           df=df,
                           index_cols=self.index_cols,
                           data_cols=self.draw_cols,
                           dimension="cause_id")
        df = df[self.index_cols + self.draw_cols]

        # aggregate ncodes
        df = df.merge(self.como_version.ncode_hierarchy)
        df_agg = df.copy()
        df_agg = df_agg.groupby([
            "age_group_id", "location_id", "year_id", "sex_id", "measure_id",
            "cause_id", "parent_id"
        ])[self.draw_cols].sum().reset_index()
        df_agg = df_agg.rename(columns={"parent_id": "rei_id"})

        # set attribute
        df = df.append(df_agg)
        df = df[self.index_cols + self.draw_cols]
        for col in self.index_cols:
            df[col] = df[col].astype(int)
        return df
示例#2
0
 def new_agg_cause_exceptions(self):
     ct = dbtrees.causetree(
         cause_set_version_id=self.cause_set_version_id).db_format()
     ct = ct.rename(columns={"location_id": "cause_id"})
     ct = ct[ct.parent_id.isin([589, 400, 521, 426, 332, 298, 297, 510, 487,
                                587])]
     ct = ct[["cause_id", "parent_id"]]
     self.agg_cause_exceptions = ct
示例#3
0
 def aggregate_causes(self):
     ct = dbtrees.causetree(
         cause_set_version_id=self.como_version.cause_set_version_id)
     ct = deepcopy(ct)
     self.imp_df = common.agg_hierarchy(tree=ct,
                                        df=self.imp_df,
                                        index_cols=self.index_cols +
                                        ["cause_id", "rei_id"],
                                        data_cols=self.draw_cols,
                                        dimension="cause_id")
示例#4
0
 def aggregate_cause(self, df, cause_set_version_id):
     ct = dbtrees.causetree(cause_set_version_id=cause_set_version_id)
     df = common.agg_hierarchy(tree=ct,
                               df=df,
                               index_cols=self.index_cols,
                               data_cols=self.draw_cols,
                               dimension="cause_id")
     df = df[self.index_cols + self.draw_cols]
     for col in self.index_cols:
         df[col] = df[col].astype(int)
     return df
示例#5
0
 def new_agg_cause_map(self):
     ct = dbtrees.causetree(self.cause_set_version_id, None, None)
     acm = []
     for n in ct.nodes:
         if (len(n.all_descendants()) > 0 and n.id not in (
                 self.agg_cause_exceptions.parent_id.unique().tolist())):
             leaves = [l.id for l in n.leaves()]
             ac_seq = self.sequela_list[
                 self.sequela_list.cause_id.isin(leaves)]
             ac_seq['cause_id'] = n.id
             acm.append(ac_seq)
     acm = pd.concat(acm)
     self.agg_cause_map = acm
示例#6
0
 def new_injuries_index(self):
     inj_keys = [u"cause_id", u"rei_id"]
     inj_pairs = self.injury_sequela[inj_keys]
     inj_pairs = inj_pairs[~inj_pairs.duplicated()]
     cause_tree = dbtrees.causetree(
         cause_set_version_id=self.cause_set_version_id)
     inj_pairs = propagate_hierarchy(cause_tree, inj_pairs, "cause_id")
     agg_pairs = inj_pairs.merge(self.ncode_hierarchy)
     agg_pairs = agg_pairs.drop("rei_id", axis=1)
     agg_pairs = agg_pairs.rename(columns={"parent_id": "rei_id"})
     inj_pairs = inj_pairs.append(agg_pairs[inj_keys])
     inj_pairs = inj_pairs[~inj_pairs.duplicated()]
     inj_vals = list(set(tuple(x) for x in inj_pairs[inj_keys].values))
     self.injuries_index = {tuple(inj_keys): inj_vals}
示例#7
0
 def new_impairment_index(self):
     rei_keys = ["rei_id", "cause_id"]
     sequela_cause = self.sequela_list[["sequela_id", "cause_id"]]
     imp_pairs = sequela_cause.merge(self.impairment_sequela)
     imp_pairs = imp_pairs[rei_keys]
     imp_pairs = imp_pairs[~imp_pairs.duplicated()]
     cause_tree = dbtrees.causetree(
         cause_set_version_id=self.cause_set_version_id)
     imp_pairs = propagate_hierarchy(cause_tree, imp_pairs, "cause_id")
     rei_tree = dbtrees.reitree(rei_set_id=4)
     imp_pairs = propagate_hierarchy(rei_tree, imp_pairs, "rei_id")
     imp_pairs = imp_pairs[imp_pairs.rei_id != 191]
     rei_vals = list(set(tuple(x) for x in imp_pairs[rei_keys].values))
     self.impairment_index = {tuple(rei_keys): rei_vals}
示例#8
0
 def generate_agg_cause_map(self):
     ct = dbtrees.causetree(None, 9)
     acm = []
     for n in ct.nodes:
         if len(n.all_descendants()) > 0:
             leaves = [l.id for l in n.leaves()]
             ac_seq = self.seq_map[self.seq_map.cause_id.isin(leaves)]
             ac_seq['cause_id'] = n.id
             if len(ac_seq) == 0:
                 print n.info
             acm.append(ac_seq)
     acm = pd.concat(acm)
     acm.to_csv("{rd}/info/agg_cause_map.csv".format(rd=self.root_dir),
                index=False)
     return acm
示例#9
0
 def _cache_cause_hierarchy(self) -> None:
     logger.debug("Starting to load cause_hierarchy cache")
     tree_list = []
     for cause_set_id in self.cause_set_ids:
         sub_tree_list = dbtrees.causetree(
             cause_set_id=cause_set_id,
             gbd_round_id=self.gbd_round_id,
             return_many=True)
         for tree in sub_tree_list:
             tree_list.append(tree)
     cache_file = "FILEPATH".format(
         self.cache_dir)
     pickle.dump(tree_list, open(cache_file, "wb"))
     logger.debug("Cached cause_hierarchies in {}".format(
         cache_file))
示例#10
0
 def new_impairment_index(self):
     rei_keys = ["rei_id", "cause_id"]
     sequela_cause = self.sequela_list[["sequela_id", "cause_id"]]
     imp_pairs = sequela_cause.merge(self.impairment_sequela)
     imp_pairs = imp_pairs[rei_keys]
     imp_pairs = imp_pairs[~imp_pairs.duplicated()]
     cause_tree = dbtrees.causetree(
         cause_set_version_id=self.cause_set_version_id)
     imp_pairs = propagate_hierarchy(cause_tree, imp_pairs, "cause_id")
     # 4 - GBD Reporting Impairments
     rei_tree = dbtrees.reitree(rei_set_id=4,
                                gbd_round_id=self.gbd_round_id)
     imp_pairs = propagate_hierarchy(rei_tree, imp_pairs, "rei_id")
     # rei_id 191 - Impairments (this is the aggregate)
     imp_pairs = imp_pairs[imp_pairs.rei_id != 191]
     rei_vals = list(set(tuple(x) for x in imp_pairs[rei_keys].values))
     self.impairment_index = {tuple(rei_keys): rei_vals}
示例#11
0
    def aggregate_cause(self, df, cause_set_version_id):
        ct = dbtrees.causetree(cause_set_version_id=cause_set_version_id)
        df = common.agg_hierarchy(tree=ct,
                                  df=df,
                                  index_cols=self.index_cols,
                                  data_cols=self.draw_cols,
                                  dimension="cause_id")
        df = df[self.index_cols + self.draw_cols]
        for col in self.index_cols:
            df[col] = df[col].astype(int)
        all_done = df.cause_id.unique().tolist()

        cflat = view_cause_hierarchy_history(
            self.como_version.reporting_cause_set_version_id)
        cflat = cflat[[
            'cause_id', 'path_to_top_parent', 'parent_id', 'cause_name',
            'acause', 'cause_outline'
        ]]
        roots = cflat[cflat.cause_id == cflat.parent_id].parent_id
        cts = []
        for root in roots:
            thisflat = cflat[cflat.path_to_top_parent.str.startswith(
                str(root))]
            ct = tree.parent_child_to_tree(
                thisflat,
                'parent_id',
                'cause_id',
                info_cols=['cause_name', 'acause', 'cause_outline'])
            cts.append(ct)

        reporting_dfs = []
        for ct in cts:
            rep_only = common.agg_hierarchy(tree=ct,
                                            df=df.copy(deep=True),
                                            index_cols=self.index_cols,
                                            data_cols=self.draw_cols,
                                            dimension="cause_id")
            reporting_dfs.append(rep_only)
        reporting_df = pd.concat(reporting_dfs)

        reporting_df = reporting_df[~reporting_df["cause_id"].isin(all_done)]
        reporting_df = reporting_df[self.index_cols + self.draw_cols]
        for col in self.index_cols:
            reporting_df[col] = reporting_df[col].astype(int)

        return pd.concat([df, reporting_df]).reset_index(drop=True)
示例#12
0
 def new_agg_cause_exceptions(self):
     ct = dbtrees.causetree(
         cause_set_version_id=self.cause_set_version_id).db_format()
     ct = ct.rename(columns={"location_id": "cause_id"})
     # For these causes, the sum of underlying cause is equal to the parent
     # cause. This is an exception as prevelence is not usually additive at
     # the cause, just the sequela level.
     # 297 - Tuberculosis
     # 298 - HIV/AIDS
     # 400 - Acute hepatitis
     # 417 - Liver cancer
     # 487 - Leukemia
     # 510 - Pneumoconiosis
     # 521 - Cirrhosis and other chronic liver diseases
     # 587 - Diabetes mellitus
     # 589 - Chronic kidney disease
     ct = ct[ct.parent_id.isin(
         [589, 400, 521, 417, 298, 297, 510, 487, 587])]
     ct = ct[["cause_id", "parent_id"]]
     self.agg_cause_exceptions = ct
示例#13
0
def agg_cause_hierarchy(df):
    thisdf = df.copy()
    ct = dbtrees.causetree(None, 9)
    idx_cols = ['location_id', 'year_id', 'age_group_id', 'sex_id']
    if 'rei_id' in thisdf.columns:
        idx_cols.append('rei_id')
    md = ct.max_depth()
    lvl = md - 1
    while lvl >= 0:
        aggs = []
        for cause in ct.level_n_descendants(lvl):
            child_ids = [c.id for c in cause.children]
            if len(child_ids) > 0:
                agg = thisdf[thisdf.cause_id.isin(child_ids)]
                agg = agg.groupby(idx_cols).sum().reset_index()
                agg['cause_id'] = cause.id
                aggs.append(agg)
        aggs = pd.concat(aggs)
        thisdf = pd.concat([thisdf, aggs])
        lvl = lvl - 1
    thisdf = thisdf.groupby(idx_cols + ['cause_id']).sum().reset_index()
    return thisdf
示例#14
0
 def get_cause_tree(self):
     ct = dbtrees.causetree(self.como_version.cause_set_version_id, None,
                            None)
     return deepcopy(ct)
示例#15
0
def get_cause_tree(cv):
    ct = dbtrees.causetree(cv.cause_set_version_id, None, None)
    return deepcopy(ct)
示例#16
0
 def tree(self) -> hierarchies.tree.Tree:
     if self._tree is None:
         self._tree = causetree(cause_set_version_id=self.set_version_id,
                                gbd_round_id=self.gbd_round_id)
     return self._tree
示例#17
0
 def new_cause_index(self):
     ctree = dbtrees.causetree(
         cause_set_version_id=self.cause_set_version_id)
     cause_id = [node.id for node in ctree.nodes]
     self.cause_index = {"cause_id": cause_id}