Exemplo n.º 1
0
def process_como_prev_draws(past_future, version=dw.COMO_VERS):

    if past_future == 'past':
        data_dir = dw.INPUT_DATA_DIR + 'como_prev' + '/' + str(
            version)  # just como_prev for now
    elif past_future == 'future':
        data_dir = dw.FORECAST_DATA_DIR + 'como_prev' + '/' + str(version)
    else:
        raise ValueError(
            'The past_future arg must be set to "past" or "future".')

    db_pops = qry.get_pops()

    dfs = []

    # nonfatal and nema + fatal
    component_ids = [
        125, 128, 131, 1433, 149, 152, 140, 143, 146, 104, 107, 110, 113, 116,
        119, 122, 134, 137
    ]
    for component_id in component_ids:
        print("pulling " + str(component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df['metric_id'] = 3
        df = df.merge(db_pops, how='left')

        # Keep sex-split
        df = df[df.sex_id != 3]  # temporary for goalkeepers diagnostics
        df_sex_split = df.copy(deep=True)

        # aggregate sexes
        df = agg.aggregate_sexes(df, dw.COMO_GROUP_COLS)

        # age standardize
        print('appending sex split data')
        df = df.append(df_sex_split, ignore_index=True)
        df = agg.age_standardize(df, dw.COMO_GROUP_COLS, 0, 125)

        df = df[dw.COMO_GROUP_COLS + dw.DRAW_COLS]

        print("outputting " + str(component_id))
        df.to_feather(data_dir + '/' + str(component_id) + '_as.feather')
Exemplo n.º 2
0
def process_dismod_draws(past_future, version=dw.DISMOD_VERS):

    if past_future == 'past':
        index_cols = dw.DISMOD_GROUP_COLS
        data_dir = dw.INPUT_DATA_DIR + 'dismod' + '/' + str(version)
        db_pops = qry.get_pops()
    elif past_future == 'future':
        index_cols = ['indicator_component_id'] + INDEX_COLS_FUTURE
        data_dir = dw.FORECAST_DATA_DIR + 'dismod' + '/' + str(version)
        db_pops = load_population()
    else:
        raise ValueError(
            'The past_future arg must be set to "past" or "future".')

    component_ids = [14, 17, 242, 245]  # no child sex abuse (pulled later)
    dfs = []

    for component_id in component_ids:
        print("pulling {c}".format(c=component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df.loc[:, 'indicator_component_id'] = component_id
        dfs.append(df)

    print("concatenating")
    df = pd.concat(dfs, ignore_index=True)

    df = df[index_cols + dw.DRAW_COLS]

    # COLLAPSE SEX/AGES
    df = df.merge(db_pops, how='left')
    df_sex_split = df[df.indicator_component_id.isin([14, 17])]
    df.loc[df['indicator_component_id'].isin([14, 17]),
           'sex_id'] = 3  # physical and sexual violence sex aggregation
    df = agg.age_sex_aggregate(df, group_cols=index_cols)
    df = pd.concat([df, df_sex_split], ignore_index=True)

    # AGE STANDARDIZE
    df_age_stand_all_age = df.loc[df['indicator_component_id'].isin([14, 17])]
    df_age_stand_15_plus = df.loc[df['indicator_component_id'].isin(
        [242, 245])]  # int partner and non-int partner violence

    # global
    df_aa_global = agg.aggregate_locations_to_global(df_age_stand_all_age,
                                                     index_cols,
                                                     age_standardized=True,
                                                     age_group_years_start=0,
                                                     age_group_years_end=125,
                                                     age_group_id=27)

    df_15_plus_global = agg.aggregate_locations_to_global(
        df_age_stand_15_plus,
        index_cols,
        age_standardized=True,
        age_group_years_start=15,
        age_group_years_end=125,
        age_group_id=29)

    # national/subnational
    df_age_stand_all_age = agg.age_standardize(df_age_stand_all_age,
                                               index_cols, 0, 125, 27)
    df_age_stand_15_plus = agg.age_standardize(df_age_stand_15_plus,
                                               index_cols, 15, 125, 29)

    # concat
    print("concatenating")
    df = pd.concat([df_age_stand_all_age, df_age_stand_15_plus],
                   ignore_index=True)
    df_global = pd.concat([df_aa_global, df_15_plus_global], ignore_index=True)

    # output
    df = df[index_cols + dw.DRAW_COLS]

    file_dict = dict(zip(component_ids, ['1094', '1095', '1047', '1098']))

    for component_id in file_dict.keys():

        path = data_dir + '/' + file_dict[component_id] + '.feather'
        global_path = data_dir + '/' + file_dict[
            component_id] + '_global' + '.feather'

        # sdg
        print('outputting ' + file_dict[component_id])
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        # global
        print('outputting ' + file_dict[component_id] + ' global')
        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(global_path)

    #############################################
    # child sex abuse
    index_cols = index_cols.remove('indicator_component_id')

    df_csa = pd.read_feather(data_dir + '/' + '1064_age_disagg.feather')
    df_csa = df_csa.merge(db_pops, how='left')

    # aggregation and output
    df_csa.loc[:, 'age_group_id'] = 202
    df_csa = agg.age_sex_aggregate(df_csa, group_cols=index_cols)

    df_csa_global = df_csa.copy(deep=True)

    df_csa = df_csa[index_cols + dw.DRAW_COLS]
    df_csa.reset_index(drop=True, inplace=True)
    print('outputting 1064')
    df_csa.to_feather(data_dir + '/' + '1064.feather')

    df_csa_global = agg.aggregate_locations_to_global(df_csa_global,
                                                      index_cols)
    df_csa_global = df_csa_global[index_cols + dw.DRAW_COLS]
    df_csa_global.reset_index(drop=True, inplace=True)
    print('outputting 1064 global')
    df_csa_global.to_feather(data_dir + '/' + '1064_global.feather')

    return df
Exemplo n.º 3
0
def process_codcorrect_draws(version=dw.CC_VERS):

    index_cols = dw.CC_GROUP_COLS
    component_ids = dw.CC_ALL_AGE_COMPONENT_IDS + dw.CC_THIRTY_SEVENTY_COMPONENT_IDS + dw.CONF_DIS_COMPONENT_IDS
    data_dir = dw.INPUT_DATA_DIR + 'codcorrect' + '/' + str(version)

    dfs = []
    for component_id in component_ids:
        print("pulling {c}".format(c=component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df.loc[:, 'indicator_component_id'] = component_id
        dfs.append(df)

    print("concatenating")
    df = pd.concat(dfs, ignore_index=True)

    # convert to numbers
    db_pops = qry.get_pops()
    df = df.merge(db_pops, how='left')

    # keep sex split for certain indicators (ncds, road mort, poisoning, homicide)
    df_keep_sex_split = df.loc[df['indicator_component_id'].isin(
        dw.CC_THIRTY_SEVENTY_COMPONENT_IDS + dw.CC_ALL_AGE_COMPONENT_IDS
    ), :]  # these age_groups get standardized later

    # collapse sex (and age for conlict and distaster mort)
    df['sex_id'] = 3
    df.loc[df['indicator_component_id'].isin(dw.CONF_DIS_COMPONENT_IDS),
           'age_group_id'] = 22
    df = agg.age_sex_aggregate(df, group_cols=index_cols)

    # make sure it looks like we expect
    assert set(df.loc[df['cause_id'].isin(dw.CONF_DIS_CAUSES)].age_group_id) == set([22]), \
        'unexpected age group ids found'
    assert set(df.loc[~df['cause_id'].isin(dw.CONF_DIS_CAUSES)].age_group_id) == \
        set(range(2, 21) + range(30, 33) + [235]), \
        'unexpected age group ids found'
    assert set(df.sex_id) == set([3]), 'unexpected sex ids found'

    # concat sex-split data before age-standardizing
    df = pd.concat([df, df_keep_sex_split], axis=0)

    # prepare for age-standardization
    # all age-standardized except for conflict and disaster mort
    df_conf_dis = df.loc[df['indicator_component_id'].isin(
        dw.CONF_DIS_COMPONENT_IDS)]
    df_ncds = df.loc[df['indicator_component_id'].isin(
        dw.CC_THIRTY_SEVENTY_COMPONENT_IDS)]
    df_all_ages = df.loc[df['indicator_component_id'].isin(
        dw.CC_ALL_AGE_COMPONENT_IDS)]

    # global aggregation
    df_cd_global = agg.aggregate_locations_to_global(df_conf_dis, index_cols)

    df_ncds_global = agg.aggregate_locations_to_global(
        df_ncds,
        index_cols,
        age_standardized=True,
        age_group_years_start=30,
        age_group_years_end=70,
        age_group_id=214)

    df_aa_global = agg.aggregate_locations_to_global(df_all_ages,
                                                     index_cols,
                                                     age_standardized=True,
                                                     age_group_years_start=0,
                                                     age_group_years_end=125,
                                                     age_group_id=27)

    # age standardize
    df_ncds = agg.age_standardize(df_ncds, index_cols, 30, 70, 214)
    df_all_ages = agg.age_standardize(df_all_ages, index_cols, 0, 125, 27)

    # concat all
    print('concatenating')
    df = pd.concat([df_ncds, df_all_ages, df_conf_dis], axis=0)
    df_global = pd.concat([df_ncds_global, df_aa_global, df_cd_global], axis=0)

    # output
    df = df[index_cols + dw.DRAW_COLS]
    df_global = df_global[index_cols + dw.DRAW_COLS]

    file_dict = dw.CC_FILE_DICT
    for component_id in file_dict.keys():

        path = data_dir + '/' + file_dict[component_id] + '.feather'
        global_path = data_dir + '/' + file_dict[
            component_id] + '_global' + '.feather'

        # sdg
        print('outputting ' + file_dict[component_id])
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        # global
        print('outputting ' + file_dict[component_id] + ' global')
        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(global_path)
Exemplo n.º 4
0
def process_risk_exposure_draws(past_future, version=dw.RISK_EXPOSURE_VERS):

    if past_future == 'past':
        index_cols = dw.RISK_EXPOSURE_GROUP_COLS
        data_dir = dw.INPUT_DATA_DIR + 'risk_exposure' + '/' + str(version)
        db_pops = qry.get_pops()
    if past_future == 'future':
        index_cols = ['indicator_component_id'] + INDEX_COLS_FUTURE
        data_dir = dw.FORECAST_DATA_DIR + 'risk_exposure' + '/' + str(version)
        db_pops = load_population()

    component_ids = dw.RISK_EXPOSURE_COMPONENT_IDS

    dfs = []
    for component_id in component_ids:
        print("pulling {c}".format(c=component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df.loc[:, 'indicator_component_id'] = component_id
        dfs.append(df)

    print("concatenating")
    df = pd.concat(dfs, ignore_index=True)

    # collapse sex/ages
    df = df.merge(db_pops, how='left')

    # set age-groups for aggregation now so it doesn't have to be done twice (for sex_split)
    df.loc[df.indicator_component_id.isin([35, 41]),
           'age_group_id'] = 1  # Malnutrition
    df.loc[df.indicator_component_id == 44,
           'age_group_id'] = 5  # Child Overweight

    # keep these for later
    df_sex_split = df[~df.indicator_component_id.isin([5, 227])]
    df_smoking = df[df.indicator_component_id == 227]
    df_smoking_sex_split = df_smoking.copy(deep=True)

    df = df[df.indicator_component_id != 227]  # remove smoking from main df

    # age/sex aggregate
    df['sex_id'] = 3  # changes everything but Mean PM2.5 which is already aggregated
    print("concatenating")  # concat sex-split data
    df = pd.concat([df, df_sex_split], ignore_index=True)
    df = agg.age_sex_aggregate(df,
                               group_cols=index_cols,
                               denominator='population')

    # sex aggregate smoking data before age-standardizing
    df_smoking['sex_id'] = 3
    df = agg.age_sex_aggregate(df,
                               group_cols=index_cols,
                               denominator='population')
    print("concatenating")
    df_smoking = pd.concat([df_smoking, df_smoking_sex_split], axis=0)

    # aggregate all but smoking to global
    df_global = agg.aggregate_locations_to_global(df, index_cols)

    # aggregate smoking to global and age-standardize global and non-global
    df_smoking_global = agg.aggregate_locations_to_global(
        df_smoking,
        index_cols,
        age_standardized=True,
        age_group_years_start=10,
        age_group_years_end=125,
        age_group_id=194)
    # df_smoking_global['units'] = 'sdg'
    df_smoking = agg.age_standardize(df_smoking, index_cols, 10, 125, 194)

    # concat smoking
    df = pd.concat([df, df_smoking], axis=0)
    df_global = pd.concat([df_global, df_smoking_global], axis=0)

    # output
    df = df[index_cols + dw.DRAW_COLS]
    df_global = df_global[index_cols + dw.DRAW_COLS]

    file_dict = dw.RE_FILE_DICT
    for component_id in file_dict.keys():

        path = data_dir + '/' + str(file_dict[component_id]) + '.feather'
        global_path = data_dir + '/' + str(
            file_dict[component_id]) + '_global' + '.feather'

        # sdg
        print('outputting ' + str(file_dict[component_id]))
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        # global
        print('outputting ' + str(file_dict[component_id]) + ' global')
        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(global_path)

    return df_global
Exemplo n.º 5
0
def process_burdenator_draws(past_future, version=dw.BURDENATOR_VERS):
    if past_future == 'past':
        index_cols = dw.RISK_BURDEN_GROUP_COLS
        data_dir = dw.INPUT_DATA_DIR + 'risk_burden' + '/' + str(version)
        db_pops = qry.get_pops()
    elif past_future == 'future':
        index_cols = ['indicator_component_id'] + INDEX_COLS_FUTURE
        data_dir = dw.FORECAST_DATA_DIR + 'risk_burden' + '/' + str(version)
        db_pops = load_population()

    else:
        raise ValueError(
            'The past_future arg must be set to "past" or "future".')

    dfs = []

    component_ids = dw.RISK_BURDEN_COMPONENT_IDS + dw.RISK_BURDEN_DALY_COMPONENT_IDS
    for component_id in component_ids:
        print("pulling " + str(component_id))
        df = pd.read_feather(data_dir + '/' + str(component_id) + '.feather')
        df.loc[:, 'indicator_component_id'] = component_id
        dfs.append(df)

    df = pd.concat(dfs, ignore_index=True)

    # aggregate to both sexes but keep sex-split data as well
    df = df.merge(db_pops, how='left')
    df_sex_split = df.copy(deep=True)
    df['sex_id'] = 3
    df = agg.age_sex_aggregate(df, group_cols=index_cols)
    df = pd.concat([df, df_sex_split], axis=0, ignore_index=True)

    # global
    df_global = agg.aggregate_locations_to_global(df,
                                                  index_cols,
                                                  age_standardized=True,
                                                  age_group_years_start=0,
                                                  age_group_years_end=125,
                                                  age_group_id=27)

    # age-standardize
    df = agg.age_standardize(df, index_cols, 0, 125)

    # output
    df = df[index_cols + dw.DRAW_COLS]
    df_global = df_global[index_cols + dw.DRAW_COLS]

    file_dict = dw.RB_INPUT_FILE_DICT
    for component_id in file_dict.keys():

        path = data_dir + '/' + str(file_dict[component_id]) + '.feather'
        global_path = data_dir + '/' + str(
            file_dict[component_id]) + '_global' + '.feather'

        # sdg
        print('outputting ' + str(file_dict[component_id]))
        df_id = df[df.indicator_component_id == component_id]
        df_id.reset_index(drop=True, inplace=True)
        df_id.to_feather(path)

        # global
        print('outputting ' + str(file_dict[component_id]) + ' global')
        df_id_global = df_global[df_global.indicator_component_id ==
                                 component_id]
        df_id_global.reset_index(drop=True, inplace=True)
        df_id_global.to_feather(global_path)

    return df