Exemplo n.º 1
0
    def _agg_age_std_ages(self):
        age_tree = agetree(age.AGE_STANDARDIZED)

        # make the source and sink
        source = self.gen_draw_source()
        source.add_transform(
            fill_square,
            index_cols=[
                col for col in self.dimensions.index_names
                if col != "age_group_id"
            ],
            square_col="age_group_id",
            square_col_vals=[node.id for node in age_tree.leaves()])
        sink = self.gen_draw_sink()

        # constuct aggregator obj
        operator = WtdSum(index_cols=[
            col for col in self.dimensions.index_names if col != "age_group_id"
        ],
                          value_cols=self.dimensions.data_list(),
                          weight_df=self.std_age_weights,
                          weight_name="age_group_weight_value",
                          merge_cols=["age_group_id"])
        aggregator = AggSynchronous(draw_source=source,
                                    draw_sink=sink,
                                    index_cols=[
                                        col
                                        for col in self.dimensions.index_names
                                        if col != "age_group_id"
                                    ],
                                    aggregate_col="age_group_id",
                                    operator=operator)

        # run the tree
        aggregator.run(age_tree)
Exemplo n.º 2
0
    def _agg_pop_wtd_ages_birth(self, age_group_id):
        age_tree = agetree(age_group_id)
        age_tree.add_node(age.BIRTH, {}, age_tree.root.id)

        # make the source and sink
        source = self.gen_draw_source()
        source.add_transform(convert_to_counts, self.population,
                             self.dimensions.data_list())
        sink = self.gen_draw_sink()
        sink.add_transform(convert_to_rates, self.population,
                           self.dimensions.data_list())

        # constuct aggregator obj
        operator = Sum(index_cols=[
            col for col in self.dimensions.index_names if col != "age_group_id"
        ],
                       value_cols=self.dimensions.data_list())
        aggregator = AggSynchronous(draw_source=source,
                                    draw_sink=sink,
                                    index_cols=[
                                        col
                                        for col in self.dimensions.index_names
                                        if col != "age_group_id"
                                    ],
                                    aggregate_col="age_group_id",
                                    operator=operator)

        aggregator.run(age_tree)
Exemplo n.º 3
0
def _compute_age_aggregates(
        data: pd.DataFrame,
        gbd_round_id: int,
        groupby_cols: List[str]=Columns.INDEX
) -> pd.DataFrame:
    """
    Takes a dataframe in count space, calculates all aggregated ages from
    gbd.constants.GBD_COMPARE_AGES + ALL_AGES, and returns aggregates as a new
    dataframe

    Arguments:
        df (pd.DataFrame): dataframe containing indices and draws to create
            age aggregates with.

        gbd_round_id (int):

    Returns:
        A new aggregated data set
    """
    compare_ages = list(
        set(gbd.GBD_COMPARE_AGES).union(set(Ages.END_OF_ROUND_AGE_GROUPS))
    )

    if gbd.age.ALL_AGES not in compare_ages:
        compare_ages.append(gbd.age.ALL_AGES)

    data = data[~data[Columns.AGE_GROUP_ID].isin(compare_ages)]
    # create age trees
    age_trees = []
    for age_group in compare_ages:
        tree = agetree(age_group_id=age_group, gbd_round_id=gbd_round_id)
        age_trees.append(tree)

    agg_ages = []
    # for each tree, identify the child age groups, groupby sum the children
    # to produce the parent estimates.
    for atree in age_trees:
        child_ids = list(map(lambda x: x.id, atree.root.children))
        child_data = data[data[Columns.AGE_GROUP_ID].isin(child_ids)].copy()
        child_data[Columns.AGE_GROUP_ID] = atree.root.id
        child_data = child_data.groupby(groupby_cols).sum().reset_index()
        agg_ages.append(child_data)
    aggregated_ages: pd.DataFrame = pd.concat(agg_ages).reset_index(drop=True)
    return aggregated_ages
Exemplo n.º 4
0
def generate_aggregated_ages(df, index_columns, database='gbd'):
    """Takes in a dataframe in count space, calculates all aggregated ages, and
    adds to the dataframe.

    Arguments:
        df (pd.DataFrame): dataframe containing indices and draws to create
            age aggregates with.
        index_columns (str[]): list of strings represnting the data indices to
            aggregate over.

    Returns:
        The original dataset with all-ages added.
    """
    compare_ages = GBD.GBD_COMPARE_AGES
    # remove 28 from our list of GBD compare ages. We don't compute under one.
    if 28 in compare_ages:
        compare_ages.remove(28)

    if 22 not in compare_ages:
        compare_ages.append(22)

    if (database == 'cod') and (21 in compare_ages):
        compare_ages.remove(21)

    df = df[~df['age_group_id'].isin(compare_ages)]
    # create age trees
    age_trees = []
    for age_group in compare_ages:
        tree = agetree(age_group_id=age_group, gbd_round_id=GBD.GBD_ROUND_ID)
        age_trees.append(tree)

    agg_ages = []
    for atree in age_trees:
        child_ids = list(map(lambda x: x.id, atree.root.children))
        temp = df[df['age_group_id'].isin(child_ids)].copy(deep=True)
        temp['age_group_id'] = atree.root.id
        temp = temp.groupby(index_columns).sum().reset_index()
        agg_ages.append(temp)
    agg_ages_df = pd.concat(agg_ages)
    df = pd.concat([df, agg_ages_df]).reset_index(drop=True)
    return df
Exemplo n.º 5
0
def get_age_group_map(gbd_round_id: int,
                      age_group_ids: List[int]) -> Dict[int, List[int]]:
    """Gets dictionary of age group ID to age group IDs in the aggregate.

    Replaces detailed age groups [2, 3, 4] with aggregate age group 28 since life tables
    are not produced for age groups [2, 3, 4].

    Sorts age groups by age start since probability of death calculation relies on age groups
    being in sorted order.

    Most-detailed age groups are returned in the same format as aggregate age groups in order
    to provide the probability of death calculation with a consistent data structure. E.g:
    {
        6: [6]                # Most detailed
        21: [30, 31, 32, 235] # Aggregate
    }

    Args:
        gbd_round_id: ID of the GBD round with which to build age trees.
        age_group_ids: IDs of age groups, both aggregate and most detailed.

    Returns:
        Dictionary of age group ID to age group IDs that comprise the aggregate.
    """
    age_groups_with_starts = ezfuncs.query(
        queries.GET_AGE_STARTS,
        conn_def="cod",
        parameters={"age_group_ids": age_group_ids})

    age_group_map: Dict[int, List[int]] = {}
    for age_group_id in age_group_ids:
        tree = dbtrees.agetree(age_group_id, gbd_round_id)
        detailed_ids = [node.id for node in tree.root.children]
        by_age_start = _sort_age_group_ids(detailed_ids,
                                           age_groups_with_starts)
        under_one_replaced = _replace_under_one(by_age_start)
        age_group_map[age_group_id] = under_one_replaced
    return age_group_map
Exemplo n.º 6
0
    def __init__(
        self, cause_id, year_id,
        out_dir, cod_process_v,
        decomp_step, gbd_round_id,
        location_set_ids=mmr_constants.AGGREGATE_LOCATION_SET_IDS):

        self.cause_id = cause_id
        self.year_id = year_id
        self.out_dir = out_dir
        self.cod_process_v = cod_process_v
        self.decomp_step = decomp_step
        self.gbd_round_id = gbd_round_id

        self.sex_id = [2]
        self.age_group_ids = list(range(7, 16))
        self.location_set_ids = location_set_ids
        self.location_ids = self.get_location_ids()
        self.aggregated_age_group_ids: Dict[int: List[int]] = {
            ag_id: [_id.id for _id in dbtrees.agetree(ag_id).leaves()]
            for ag_id in mmr_constants.AGGREGATE_AGE_GROUP_IDS
        }
        self.draw_cols = ['draw_{}'.format(i) for i in list(range(0, 1000))]
        self.index_cols = ['location_id', 'year_id', 'age_group_id', 'sex_id']
        self.live_birth_col = mmr_constants.Columns.LIVE_BIRTH_VALUE_COL
Exemplo n.º 7
0
    def _agg_pop_wtd_ages(self, age_group_id):
        age_tree = agetree(age_group_id)

        # make the source and sink
        source = self.gen_draw_source()
        source.add_transform(
            fill_square,
            index_cols=[
                col for col in self.dimensions.index_names
                if col != "age_group_id"
            ],
            square_col="age_group_id",
            square_col_vals=[node.id for node in age_tree.leaves()])
        sink = self.gen_draw_sink()

        # constuct aggregator obj
        operator = WtdSum(
            index_cols=[
                col for col in self.dimensions.index_names
                if col != "age_group_id"
            ],
            value_cols=self.dimensions.data_list(),
            weight_df=self.population,
            weight_name="population",
            merge_cols=["location_id", "year_id", "age_group_id", "sex_id"])
        aggregator = AggSynchronous(draw_source=source,
                                    draw_sink=sink,
                                    index_cols=[
                                        col
                                        for col in self.dimensions.index_names
                                        if col != "age_group_id"
                                    ],
                                    aggregate_col="age_group_id",
                                    operator=operator)

        aggregator.run(age_tree)
Exemplo n.º 8
0
    def new_population(self, location_set_id, agg_loc_sets=[]):
        dim = self.nonfatal_dimensions.get_simulation_dimensions(
            self.measure_id)
        df = get_population(
            age_group_id=(
                dim.index_dim.get_level("age_group_id") + [164]),
            location_id=dbtrees.loctree(location_set_id=location_set_id,
                                        gbd_round_id=self.gbd_round_id
                                        ).node_ids,
            sex_id=dim.index_dim.get_level("sex_id"),
            year_id=dim.index_dim.get_level("year_id"))
        index_cols = ["location_id", "year_id", "age_group_id", "sex_id"]
        data_cols = ["population"]

        io_mock = {}
        source = DrawSource({"draw_dict": io_mock, "name": "tmp"},
                            mem_read_func)
        sink = DrawSink({"draw_dict": io_mock, "name": "tmp"}, mem_write_func)
        sink.push(df[index_cols + data_cols])

        # location
        for set_id in agg_loc_sets:
            loc_tree = dbtrees.loctree(
                location_set_id=set_id,
                gbd_round_id=self.gbd_round_id)
            operator = Sum(
                index_cols=[col for col in index_cols if col != "location_id"],
                value_cols=data_cols)
            aggregator = AggSynchronous(
                draw_source=source,
                draw_sink=sink,
                index_cols=[col for col in index_cols if col != "location_id"],
                aggregate_col="location_id",
                operator=operator)
            aggregator.run(loc_tree)

        # age
        for age_group_id in ComoSummaries._gbd_compare_age_group_list:
            age_tree = dbtrees.agetree(age_group_id)
            operator = Sum(
                index_cols=[col for col in index_cols if col != "age_group_id"
                            ],
                value_cols=data_cols)
            aggregator = AggSynchronous(
                draw_source=source,
                draw_sink=sink,
                index_cols=[col for col in index_cols if col != "age_group_id"
                            ],
                aggregate_col="age_group_id",
                operator=operator)
            aggregator.run(age_tree)

        # sex
        sex_tree = dbtrees.sextree()
        operator = Sum(
            index_cols=[col for col in index_cols if col != "sex_id"],
            value_cols=data_cols)
        aggregator = AggSynchronous(
            draw_source=source,
            draw_sink=sink,
            index_cols=[col for col in index_cols if col != "sex_id"],
            aggregate_col="sex_id",
            operator=operator)
        aggregator.run(sex_tree)
        df = source.content()
        df.to_hdf(
            "{}/info/population.h5".format(self.como_dir),
            'draws',
            mode='w',
            format='table',
            data_columns=["location_id", "year_id", "age_group_id", "sex_id"])