Exemplo n.º 1
0
def summarize_loc(source,
                  drawdir,
                  outdir,
                  location_id,
                  year_id,
                  rei_ids,
                  change_intervals=None,
                  gbd_round_id=5):
    '''summarize every rei for a single location'''
    # Set global age weights
    gbd_round_map = get_ids('gbd_round')
    gbd_round = gbd_round_map.loc[gbd_round_map.gbd_round_id ==
                                  gbd_round_id].gbd_round.item()
    Globals.aw = get_age_weights(gbd_round_id=int(gbd_round_id))
    # Set global population
    pops = []
    popfiles = glob(os.path.join(drawdir, 'population_*.csv'))
    for popfile in popfiles:
        pops.append(pd.read_csv(popfile))
    pops = pd.concat(pops).drop_duplicates(
        subset=['location_id', 'age_group_id', 'year_id', 'sex_id'])
    Globals.pop = pops.rename(columns={'population': 'pop_scaled'})

    pool = Pool(10)
    results = pool.map(summ_loc, [((source, location_id, rei, year_id,
                                    change_intervals, gbd_round_id), {})
                                  for rei in rei_ids])
    pool.close()
    pool.join()
    results = [res for res in results if isinstance(res, tuple)]
    results = list(zip(*results))

    single_year = pd.concat([res for res in results[0] if res is not None])
    single_year = single_year[[
        'rei_id', 'location_id', 'year_id', 'age_group_id', 'sex_id',
        'measure_id', 'metric_id', 'val', 'lower', 'upper'
    ]]
    single_file = os.path.join(outdir,
                               'single_year_{}.csv'.format(location_id))
    single_year.to_csv(single_file, index=False)
    os.chmod(single_file, 0o775)

    multi_year = pd.concat(results[1])
    if len(multi_year) > 0:
        multi_year = multi_year[[
            'rei_id', 'location_id', 'year_start_id', 'year_end_id',
            'age_group_id', 'sex_id', 'measure_id', 'metric_id', 'val',
            'lower', 'upper'
        ]]
        multi_year.replace([np.inf, -np.inf], np.nan)
        multi_year.dropna(inplace=True)
        multi_file = os.path.join(outdir,
                                  'multi_year_{}.csv'.format(location_id))
        multi_year.to_csv(multi_file, index=False)
        os.chmod(multi_file, 0o775)
Exemplo n.º 2
0
 def _cache_age_weights(self) -> None:
     logger.debug(
         "Starting to load age_weights cache")
     age_weights_df = get_age_weights(gbd_round_id=int(self.gbd_round_id))
     # returns age_group_id, age_group_weight_value as a pandas df
     cache_file = "FILEPATH".format(
         self.cache_dir)
     age_weights_df.to_hdf(cache_file, "age_weights",
                           data_columns=['age_group_id'], format="table")
     logger.debug(
         "Cached age_weights in {}".format(cache_file))
Exemplo n.º 3
0
def summarize_loc(source,
                  drawdir,
                  outdir,
                  location_id,
                  year_id,
                  rei_ids,
                  change_intervals,
                  gbd_round_id):
    '''summarize every rei for a single location'''
    # Set global age weights
    aw = get_age_weights(gbd_round_id=int(gbd_round_id))
    # Set global population
    pops = pd.read_hdf("FILEPATH/population.h5")
    pops = pops.loc[pops.location_id == location_id]
    pop = pops.rename(columns={'population': 'pop_scaled'})
    pool = Pool(10)
    results = pool.map(summ_loc, [(
        (source, location_id, rei, year_id, change_intervals, gbd_round_id,
        pop, aw), {})
        for rei in rei_ids])
    pool.close()
    pool.join()
    results = [res for res in results if isinstance(res, tuple)]
    results = list(zip(*results))

    single_year = pd.concat([res for res in results[0] if res is not None],sort=True)
    single_year = single_year[
        ['rei_id', 'location_id', 'year_id', 'age_group_id', 'sex_id',
         'measure_id', 'metric_id', 'val', 'lower', 'upper']]
    single_file = os.path.join(outdir, 'single_year_{}.csv'.format(location_id))
    single_year.to_csv(single_file, index=False)
    os.chmod(single_file, 0o775)

    multi_year = pd.concat(results[1],sort=True)
    if len(multi_year) > 0:
        multi_year = multi_year[
            ['rei_id', 'location_id', 'year_start_id', 'year_end_id',
             'age_group_id', 'sex_id', 'measure_id', 'metric_id', 'val',
             'lower', 'upper']]
        multi_year.replace([np.inf, -np.inf], np.nan)
        multi_year.dropna(inplace=True)
        multi_file = os.path.join(outdir, 'multi_year_{}.csv'.format(location_id))
        multi_year.to_csv(multi_file, index=False)
        os.chmod(multi_file, 0o775)
Exemplo n.º 4
0
def _compute_age_standardized_rate(
        data: pd.DataFrame,
        gbd_round_id: int
) -> pd.DataFrame:
    """
    Computes the age standardized rate from most-detailed age data and returns
    as a new dataframe.

    Arguments:
        data (pd.DataFrame)
        gbd_round_id (int)

    Returns:
        pd.DataFrame
    """
    age_std: pd.DataFrame = data.copy()
    age_weights: pd.DataFrame = get_age_weights(gbd_round_id=gbd_round_id)
    # Merge age-weight data
    age_std = pd.merge(
        age_std,
        age_weights,
        on=[Columns.AGE_GROUP_ID],
        how='left'
    )
    # calculate age-standardized value:
    # first we divide all draws by the population value for the respective
    # demographic, then we multiply the result by the age weight.
    age_std[Columns.DRAWS] = age_std[Columns.DRAWS].div(
        age_std[Columns.POPULATION].values, axis='index'
    ).mul(age_std[Columns.AGE_WEIGHT_VALUE].values, axis='index')

    # set all age groups to age-standardized
    age_std[Columns.AGE_GROUP_ID] = gbd.age.AGE_STANDARDIZED
    age_std.drop(Columns.AGE_WEIGHT_VALUE, inplace=True, axis=1)
    # groupby sum to aggregate into final age-standardized rates
    age_std = age_std.groupby(Columns.INDEX).sum().reset_index()
    # mark the metric for these estimates as rate
    age_std[Columns.METRIC_ID] = gbd.metrics.RATE
    return age_std
Exemplo n.º 5
0
 def std_age_weights(self):
     if self._std_age_weights is None:
         self._std_age_weights = get_age_weights(
             gbd_round_id=self.gbd_round_id)
     return self._std_age_weights