Пример #1
0
def cause_age_sex_agg(death_df, true_paf_df, amen_paf_df, draw_parameters,
                      uhc_version_dir):
    '''
    Aggregate PAFs over age and sex, and collapse deaths.
    '''
    # convert to count space
    true_paf_df = misc.draw_math([death_df, true_paf_df], specs.ID_COLS +
                                 ['age_group_id', 'sex_id', 'cause_id'],
                                 specs.DRAW_COLS, '*')
    amen_paf_df = misc.draw_math([death_df, amen_paf_df], specs.ID_COLS +
                                 ['age_group_id', 'sex_id', 'cause_id'],
                                 specs.DRAW_COLS, '*')

    # agg cause and sex
    true_paf_df = true_paf_df.groupby(specs.ID_COLS + ['age_group_id'],
                                      as_index=False)[specs.DRAW_COLS].sum()
    amen_paf_df = amen_paf_df.groupby(specs.ID_COLS + ['age_group_id'],
                                      as_index=False)[specs.DRAW_COLS].sum()
    death_df = death_df.groupby(specs.ID_COLS + ['age_group_id'],
                                as_index=False)[specs.DRAW_COLS].sum()

    # now that everything is both sex space, reassign sex_id
    true_paf_df['sex_id'] = 3
    amen_paf_df['sex_id'] = 3
    death_df['sex_id'] = 3

    # age-standardize
    # set counts = False even though we are passing in counts... don't want to convert these to rates (yet)
    true_paf_df = misc.age_standardize(true_paf_df,
                                       specs.ID_COLS,
                                       specs.DRAW_COLS,
                                       draw_parameters,
                                       uhc_version_dir,
                                       counts=False)
    amen_paf_df = misc.age_standardize(amen_paf_df,
                                       specs.ID_COLS,
                                       specs.DRAW_COLS,
                                       draw_parameters,
                                       uhc_version_dir,
                                       counts=False)
    death_df = misc.age_standardize(death_df,
                                    specs.ID_COLS,
                                    specs.DRAW_COLS,
                                    draw_parameters,
                                    uhc_version_dir,
                                    counts=False)

    # convert back to PAF space
    true_paf_df = misc.draw_math([true_paf_df, death_df], specs.ID_COLS,
                                 specs.DRAW_COLS, '/')
    amen_paf_df = misc.draw_math([amen_paf_df, death_df], specs.ID_COLS,
                                 specs.DRAW_COLS, '/')

    return death_df, true_paf_df, amen_paf_df
Пример #2
0
def delete_risk(df, paf_df, index_cols, draw_cols):
    '''
    Remove effect of local risk exposure.
    '''
    # get local unattribtable
    paf_df[draw_cols] = 1 - paf_df[draw_cols]
    df = misc.draw_math([df, paf_df], index_cols, draw_cols, '*')

    return df
Пример #3
0
def draw_divide(num_df, denom_df):
    '''
    Get the weight value for each uhc_id.
    '''
    # print uhc_id for ease of debugging if loop breaks
    print(pd.unique(num_df['uhc_id']))
    df = misc.draw_math([num_df, denom_df], specs.ID_COLS, specs.DRAW_COLS,
                        '/')
    df['uhc_id'] = num_df['uhc_id'].tolist()

    return df
Пример #4
0
def calc_counterfactual_burden():
    '''
    For a given service/population cell, get the risk-adjusted death rate for
    relevant indicators.
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('--uhc_id',
                        help='Indicates tracer-service_population',
                        type=int)
    parser.add_argument('--uhc_version',
                        help='Version number for run.',
                        type=int)
    parser.add_argument('--value_type', help='What are we storing,', type=str)
    args = parser.parse_args()

    # get tracers and service pop
    uhc_version_dir = FILEPATH

    # get efficacy tier
    uhc_df = pd.read_excel(FILEPATH)
    uhc_df = uhc_df.query("uhc_id == {}".format(args.uhc_id))
    efficacy = 1. - 0.2 * uhc_df['efficacy_tier'].values.item() + 0.1

    # read coverage and observed burden draws
    coverage_df = pd.read_hdf(FILEPATH)
    burden_df = pd.read_hdf(FILEPATH)

    # perform calculation:
    # counterfactual 0 is the burden we'd see if not for the intervention
    # couterfactual0 = observed / (1 - coverage * efficacy)
    # counterfactual 1 is the burden we'd see with 100% coverage of the intervention
    # couterfactual1 = couterfactual0 * (1 - 1 * efficacy)
    # the health gain weight is the difference
    # health gain weight = couterfactual0 - couterfactual1
    if len(coverage_df) + len(burden_df) == 0:
        summary_df = pd.DataFrame(columns=specs.ID_COLS +
                                  ['mean', 'lower', 'upper'])
    else:
        coverage_df[specs.DRAW_COLS] = (
            1 - coverage_df[specs.DRAW_COLS] * efficacy)
        burden_df = misc.draw_math([burden_df, coverage_df], specs.ID_COLS,
                                   specs.DRAW_COLS, '/')
        burden_df[specs.DRAW_COLS] = burden_df[specs.DRAW_COLS] - (
            burden_df[specs.DRAW_COLS] * (1 - efficacy))
        burden_df['efficacy'] = efficacy
        summary_df = misc.summarize(burden_df, specs.DRAW_COLS)

    # store
    burden_df.to_hdf(FILEPATH)
    summary_df.to_csv(FILEPATH)
Пример #5
0
def fetch_ratio_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs):
    '''
    required kwargs:
        gbd_id ([int]) = ids for `get_draws` call
        gbd_id_type ([str]) = types associated w/ each id.
        measure_id (int) = What is the measure of the indicator.
    '''
    nf_df = fetch_outputs_draws(draw_parameters, uhc_version_dir, uhc_id,
                                **kwargs)
    kwargs['measure_id'] = 1
    death_df = fetch_outputs_draws(draw_parameters, uhc_version_dir, uhc_id,
                                   **kwargs)

    # get ratio
    df = misc.draw_math([death_df, nf_df], specs.ID_COLS, specs.DRAW_COLS, '/')

    return df[specs.ID_COLS + specs.DRAW_COLS]
Пример #6
0
def add_global_risk(df, gpaf_df, index_cols, draw_cols):
    '''
    Add effect of global risk exposure.
    '''
    # get global unattribtable
    assert index_cols == specs.ID_COLS, 'Assumes location and year as index'

    # expand by location-year, average over year, then add year col back on
    gpaf_df = gpaf_df.drop('location_id', axis=1)
    gpaf_df = gpaf_df.merge(df[index_cols])
    gpaf_df = gpaf_df.groupby('location_id',
                              as_index=False)[specs.DRAW_COLS].mean()
    gpaf_df = gpaf_df.merge(df[index_cols])
    gpaf_df[draw_cols] = 1 - gpaf_df[draw_cols]
    df = misc.draw_math([df, gpaf_df], index_cols, draw_cols, '/')

    return df
Пример #7
0
def fetch_mmr_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs):
    '''
    required kwargs:
        gbd_id ([int]) = ids for `get_draws` call
        gbd_id_type ([str]) = types associated w/ each id.
        measure_id (int) = What is the measure of the indicator.
    '''
    # NOTE: need to break age/sex params, set up for all-ages (for because of met need)
    draw_parameters['age_group_id'] = range(7, 16)
    draw_parameters['sex_id'] = [2]

    # load age-standardized draws for maternal deaths and births
    death_df = fetch_outputs_draws(draw_parameters, uhc_version_dir, uhc_id,
                                   **kwargs)
    births_df = fetch_asfr_draws(draw_parameters, uhc_version_dir)

    # calc age-standardized MMR
    df = misc.draw_math([death_df, births_df], specs.ID_COLS, specs.DRAW_COLS,
                        '/')

    return df[specs.ID_COLS + specs.DRAW_COLS]
Пример #8
0
def produce_uhc():
    parser = argparse.ArgumentParser()
    parser.add_argument('--uhc_id',
                        help='0 for UHC service coverage aggregate.',
                        type=int)
    parser.add_argument('--uhc_version',
                        help='Version number for run.',
                        type=int)
    parser.add_argument('--value_type', help='What are we storing,', type=str)
    args = parser.parse_args()

    # get service_proxys and service pop
    uhc_version_dir = FILEPATH

    # retrieve the IDs we need
    uhc_ids = get_uhc_ids()

    # calculate health gain weight fraction (health gain / sum of health gains)
    count_dfs = uhc_io.compile_dfs('counterfactual_burden', uhc_ids,
                                   uhc_version_dir)
    total_df = pd.concat(count_dfs)
    total_df = total_df.groupby(specs.ID_COLS,
                                as_index=False)[specs.DRAW_COLS].sum()
    weight_dfs = [draw_divide(count_df, total_df) for count_df in count_dfs]
    weight_df = pd.concat(weight_dfs)
    weight_df['mean_weight'] = weight_df[specs.DRAW_COLS].mean(axis=1)
    # output the unadjusted weights
    summary_unadjusted_weights = misc.summarize(weight_df.copy(),
                                                specs.DRAW_COLS)
    summary_unadjusted_weights.to_csv(FILEPATH)
    weight_df.to_csv(FILEPATH)

    # adjust the weights -- take mean weight of indicator in a specified number of bands within a country and year
    weight_df['mean_weight'] = weight_df['mean_weight'].replace(0, np.nan)
    num_bands = 3
    weight_df['weight_band'] = weight_df.groupby(
        ['location_id', 'year_id']).mean_weight.transform(
            lambda x: pd.qcut(x, num_bands, labels=range(1, num_bands + 1)))
    weight_df['weight_band'] = weight_df['weight_band'].replace(np.nan, 0)
    weight_df[specs.DRAW_COLS] = weight_df.groupby(
        ['location_id', 'year_id',
         'weight_band'])[specs.DRAW_COLS].transform('mean')
    weight_df = weight_df.drop(['weight_band', 'mean_weight'], axis=1)

    # apply weight
    cov_dfs = uhc_io.compile_dfs('coverage', uhc_ids, uhc_version_dir)
    cov_df = pd.concat(cov_dfs)
    # weight_df = pd.concat(weight_dfs)
    uhcw_df = misc.draw_math([cov_df, weight_df], specs.ID_COLS + ['uhc_id'],
                             specs.DRAW_COLS, '*')
    uhcw_df = uhcw_df.groupby(specs.ID_COLS,
                              as_index=False)[specs.DRAW_COLS].sum()
    uhca_df = cov_df.groupby(specs.ID_COLS,
                             as_index=False)[specs.DRAW_COLS].mean()

    # summarize and store...
    ## WEIGHTS
    summaryw_df = misc.summarize(weight_df, specs.DRAW_COLS)
    weight_df.to_hdf(FILEPATH)
    summaryw_df.to_csv(FILEPATH)

    ## WEIGHTED VALUES
    summaryuhcw_df = misc.summarize(uhcw_df, specs.DRAW_COLS)
    uhcw_df.to_hdf(FILEPATH)
    summaryuhcw_df.to_csv(FILEPATH)

    ## AVERAGE VALUES
    summaryuhca_df = misc.summarize(uhca_df, specs.DRAW_COLS)
    uhca_df.to_hdf(FILEPATH)
    summaryuhca_df.to_csv(FILEPATH)
Пример #9
0
def fetch_art_draws(draw_parameters, uhc_version_dir, uhc_id, **kwargs):
    # load age- and sex-specific data
    _fetch_art_draws_loc = functools.partial(
        fetch_art_draws_loc, age_group_id=draw_parameters['age_group_id'])
    pool = Pool(30)
    dfs = pool.map(_fetch_art_draws_loc, draw_parameters['location_id'])
    pool.close()
    pool.join()
    df = pd.concat(dfs)

    # coverage is by sex, age, loc, year when we read it in. we need to take a
    #   few steps to get rid of that level of specificity, we need coverage by
    #   location and year, NOT by loc, year, age, and sex.
    # load prevalence and aggregate
    print("""
        prev_df = get_draws(
        source='como', gbd_round_id=GBD_ROUND, version_id={id}, num_workers=30,
        metric_id=3,
        location_id=draw_parameters['location_id'],
        year_id=draw_parameters['year_id'],
        age_group_id=draw_parameters['age_group_id'],
        sex_id=[1, 2],
        decomp_step=DECOMP_STEP,
        **kwargs {kwargs}
    )
    	""".format(id=COMO_VERSION_ID, kwargs=kwargs))
    prev_df = get_draws(source='como',
                        gbd_round_id=GBD_ROUND,
                        version_id=COMO_VERSION_ID,
                        num_workers=30,
                        metric_id=3,
                        location_id=draw_parameters['location_id'],
                        year_id=draw_parameters['year_id'],
                        age_group_id=draw_parameters['age_group_id'],
                        sex_id=[1, 2],
                        decomp_step=DECOMP_STEP,
                        **kwargs)
    prev_df = prev_df[['location_id', 'year_id', 'age_group_id', 'sex_id'] +
                      specs.DRAW_COLS]

    pop_df = pd.read_hdf(FILEPATH)
    prev_df = prev_df.merge(pop_df[[
        'location_id', 'year_id', 'age_group_id', 'sex_id', 'population'
    ]])

    # multiply prevalence (proportion) by population to get number of people with
    #   HIV/AIDS
    prev_df[specs.DRAW_COLS] = (prev_df[specs.DRAW_COLS].values.transpose() *
                                prev_df['population'].values).transpose()

    # multiply number of people with HIV/AIDS by coverage to get number of people
    #   covered
    df = misc.draw_math([df, prev_df],
                        ['location_id', 'year_id', 'age_group_id', 'sex_id'],
                        specs.DRAW_COLS, '*')

    # get number of people covered and number of people with HIV/AIDS for each
    #   year and loc (sum up by sex and by age)
    df = df.groupby(specs.ID_COLS, as_index=False)[specs.DRAW_COLS].sum()
    prev_df = prev_df.groupby(specs.ID_COLS,
                              as_index=False)[specs.DRAW_COLS].sum()

    # now divide number of people covered by number of people with HIV/AIDs to get
    #   back into coverage space
    df = misc.draw_math([df, prev_df], specs.ID_COLS, specs.DRAW_COLS, '/')

    draw_parameters['to_check'] = ['location_id', 'year_id']
    param_check(df, draw_parameters)

    return df[specs.ID_COLS + specs.DRAW_COLS]