Exemplo n.º 1
0
def custom_age_weights(age_group_ids, gbdrid=4):
    """Get age weights scaled to age group start and end"""
    t = qry.get_age_weights(gbdrid)

    t = t.query('age_group_id in {}'.format(age_group_ids))
    # scale weights to 1
    t['age_group_weight_value'] =  \
        t['age_group_weight_value'] / \
        t['age_group_weight_value'].sum()

    return t[['age_group_id', 'age_group_weight_value']]
def get_custom_age_weights(age_group_years_start, age_group_years_end):
    """Get age weights scaled to age group start and end"""
    t = qry.get_age_weights()

    t = t.query(
        'age_group_years_start >= {start} & age_group_years_end <= {end}'.
        format(start=age_group_years_start, end=age_group_years_end))
    # scale weights to 1
    t['age_group_weight_value'] = t['age_group_weight_value'] / t[
        'age_group_weight_value'].sum()

    return t[['age_group_id', 'age_group_weight_value']]
def custom_age_weights(age_group_id_start, age_group_id_end):
    """Get age weights scaled to age group start and end"""
    t = qry.get_age_weights(3)  # default is gbd 2010? Why?

    t = t.query('age_group_id >= {start} & age_group_id <= {end}'.format(
        start=age_group_id_start, end=age_group_id_end))
    # scale weights to 1
    t['age_group_weight_value'] =  \
        t['age_group_weight_value'] / \
        t['age_group_weight_value'].sum()

    return t
Exemplo n.º 4
0
def age_standardize(df):
    """ Age standardize. """

    age_weights = qry.get_age_weights(4)
    age_weights = age_weights[['age_group_id', 'age_group_weight_value']]

    df = df.merge(age_weights, on=['age_group_id'], how='left')
    assert df.age_group_weight_value.notnull().values.all(), 'age weights merg'

    # make weighted product and sum
    df = pd.concat([
        df[dw.EPI_GROUP_COLS],
        df[dw.DRAW_COLS].apply(lambda x: x * df['age_group_weight_value'])
    ],
                   axis=1)
    df['age_group_id'] = 27
    df = df.groupby(dw.EPI_GROUP_COLS, as_index=False)[dw.DRAW_COLS].sum()
    return df
def process_location_risk_burden_draws(location_id, test=False):
    ''' Given a list of rei_ids, use gopher to get attributable burden draws
    and save to out directory.

    '''

    dfs = []
    for rei_id in dw.RISK_BURDEN_REI_IDS + dw.RISK_BURDEN_DALY_REI_IDS:
        print(rei_id)
        if rei_id in dw.RISK_BURDEN_REI_IDS:
            measure_id = 1
        elif rei_id in dw.RISK_BURDEN_DALY_REI_IDS:
            measure_id = 2
        else:
            raise ValueError("no measure found")
        print('Getting draws')
        df = get_draws(gbd_id_field=['cause_id', 'rei_id'],
                       gbd_id=[294, rei_id],
                       source='burdenator',
                       version=dw.BURDENATOR_VERS,
                       location_ids=location_id,
                       year_ids=[],
                       age_group_ids=[],
                       sex_ids=[],
                       num_workers=3,
                       n_draws=1000,
                       resample=True)

        # keep years we want
        df = df.query('measure_id == {}'.format(measure_id))
        df = df.query('metric_id == 1')
        df = df.query('age_group_id in {} and sex_id in [1, 2]'.format(
            range(2, 21) + range(30, 33) + [235]))
        df = df.query('year_id in {}'.format(range(1990, 2011, 5) + [2016]))

        # aggregate to both sexes
        df['sex_id'] = 3
        df = df.groupby(dw.RISK_BURDEN_GROUP_COLS,
                        as_index=False)[dw.DRAW_COLS].sum()
        pops = qry.get_pops(both_sexes=True)
        df = df.merge(pops,
                      how='left',
                      on=['location_id', 'age_group_id', 'sex_id', 'year_id'])
        df = pd.concat([
            df[dw.RISK_BURDEN_GROUP_COLS],
            df[dw.DRAW_COLS].apply(lambda x: x / df['population'])
        ],
                       axis=1)
        df['metric_id'] = 3

        # keep the right columns
        df = df[dw.RISK_BURDEN_GROUP_COLS + dw.DRAW_COLS]

        # interpolate years
        print('Interpolating')
        df = custom_interpolate(df)

        # age-standardize
        age_weights = qry.get_age_weights(4)
        df = df.merge(age_weights)
        df = pd.concat([
            df[dw.RISK_BURDEN_GROUP_COLS],
            df[dw.DRAW_COLS].apply(lambda x: x * df['age_group_weight_value'])
        ],
                       axis=1)
        df['age_group_id'] = 27
        df = df.groupby(dw.RISK_BURDEN_GROUP_COLS,
                        as_index=False)[dw.DRAW_COLS].sum()
        dfs.append(df)

    df = pd.concat(dfs)
    write_output(df, 'risk_burden', location_id)
    return df
Exemplo n.º 6
0
def load_location_file(iso):
    df = pd.read_csv(os.path.join(dw.HIV_DIR, iso + '.csv'))
    df = df.query('year_id >= 1990 and year_id <= 2030 and variable == "Incidence"')
    df['run_num'] = 'draw_' + (df['run_num'] - 1).astype(str)
    df = pd.pivot_table(df,
                        values='value',
                        index=['location_id', 'year_id', 'age_group_id', 'sex_id'],
                        columns='run_num')
    df = df.reset_index()
    return df

if __name__ == '__main__':
    # Supplementary datasets
    print('Collecting supplementary datasets')
    age_weights = qry.get_age_weights(4)
    age_weights.loc[age_weights.age_group_id.isin([30, 31, 32, 235]), 'age_group_id'] = 21
    age_weights = age_weights.groupby(['age_group_id'], as_index=False)['age_group_weight_value'].sum()

    gbd_popdf = qry.get_pops()
    gbd_popdf.loc[gbd_popdf.age_group_id.isin([30, 31, 32, 235]), 'age_group_id'] = 21
    gbd_popdf = gbd_popdf.groupby(['location_id', 'year_id', 'age_group_id', 'sex_id'], as_index=False)['population'].sum()

    wpp_popdf = pd.read_csv('FILEPATH/wpp2015_to2063.csv')
    wpp_popdf = wpp_popdf.loc[wpp_popdf.year_id >= 2016]
    wpp_popdf = wpp_popdf.rename(index=str, columns={'pop':'population'})

    locsdf = qry.get_sdg_reporting_locations()
    locsdf['L3_loc'] = [loc[3] for loc in locsdf.path_to_top_parent.str.split(',').tolist()]

    # Compile all countries
Exemplo n.º 7
0
import sdg_utils.draw_files as dw
import sdg_utils.queries as qry

# read asfr file
print 'reading input file...'
df = pd.read_csv("{d}/asfr_10_19.csv".format(d=dw.ASFR_DIR))

# DRAW NAME STANDARDS
# rename asfr_draw_X to draw_X like others
print 'cleaning...'
df = df.rename(columns=lambda x: x.replace('asfr_draw', 'draw'))
# shift from 1-1000 to 0-999
df = df.rename(columns={'draw_1000': 'draw_0'})

# AGE STANDARDIZE
weights = qry.get_age_weights(ref_pop=3)
weights = weights.ix[weights['age_group_id'].isin([7, 8])]
weights['age_group_weight_value'] = weights['age_group_weight_value'] / \
    weights.age_group_weight_value.sum()

df = df.merge(weights, how='left')
assert df.age_group_weight_value.notnull().values.all(), \
    'merge failed'
id_cols = [
    'location_id', 'year_id', 'sex_id', 'age_group_id', 'measure_id',
    'metric_id'
]
# just call this a continuous rate? idk
df['measure_id'] = 18
df['metric_id'] = 3
df['age_group_id'] = 27