Exemplo n.º 1
0
def get_live_births_summaries(location_ids, year_ids):
    # best model_version_id at time of upload - 24083
    lvbrth_cov_id = 1106  # live births by sex covariate
    births = get_covariate_estimates(covariate_id=lvbrth_cov_id,
                                     gbd_round_id=5,
                                     location_id=location_ids,
                                     year_id=year_ids,
                                     sex_id=[3, 2, 1])
    births = births[['location_id', 'year_id', 'sex_id', 'mean_value']]
    # Currently, the live births by sex covariate returns most_detailed
    # sex but we need both sexes combined. The data needed to aggregate sex are
    # contained in the returned dataframe.
    # Use that info to aggregate sex for this data adjustment
    both_sexes = births.copy()
    both_sexes.loc[:, 'sex_id'] = 3
    both_sexes = both_sexes.groupby(['location_id', 'year_id',
                                     'sex_id']).sum().reset_index()
    births = pd.merge(births,
                      both_sexes[['location_id', 'year_id', 'mean_value']],
                      how='left',
                      on=['location_id', 'year_id'],
                      suffixes=['', '_both'],
                      indicator=True)
    assert (births._merge == 'both').all()
    births.drop('_merge', axis=1, inplace=True)
    births['birth_prop'] = births['mean_value'] / births['mean_value_both']
    # merge in 'sex' column because data in eurocat spreadsheet does not have
    # sex_ids
    sex_meta = get_ids('sex')
    births = pd.merge(births, sex_meta, on='sex_id')

    return births
Exemplo n.º 2
0
def get_sy_population(grp_pop, sex_id, pop_dir):
    """ Get single-year population (i.e. single-year ages) data for all years.

    :param grp_pop: group-population df because we need 95+
    :param sex_id (int)
    :param location_id (int)
    :param pop_dir (str): directory where population flat-files are saved
    :return: single-year population data-frame for all COD years
    """
    terminal_age = grp_pop.loc[grp_pop["age"] == 95]
    under1 = grp_pop.loc[grp_pop.age < 1]
    under1 = under1.groupby(['location_id', 'sex_id',
                             'year_id']).sum().reset_index()
    under1["age"] = 0

    master = xr.open_dataset(os.path.join('FILEPATH.nc'))
    master = master.loc[{'sex_id': [sex_id]}]
    master = master.to_dataframe().reset_index()

    # merge on single-year age names
    age_ids = db.get_ids(table="age_group")
    master = master.merge(age_ids, on=['age_group_id'])
    master.rename(columns={'age_group_name': 'age'}, inplace=True)
    master.drop('age_group_id', inplace=True, axis=1)
    master.loc[master.age == "95 plus", 'age'] = 95
    master["age"] = master["age"].astype(float)
    master = master.append(terminal_age)
    master = master.append(under1)

    return master
Exemplo n.º 3
0
def get_measures(ecode, me_id, year_id, sex_id, version):
    ids = db.get_ids(table='measure')
    inc_id = ids.loc[ids["measure_name"] == "Incidence", 'measure_id'].iloc[0]
    rms_id = ids.loc[ids["measure_name"] == "Remission", 'measure_id'].iloc[0]
    emr_id = ids.loc[ids["measure_name"] == "Excess mortality rate", 'measure_id'].iloc[0]

    dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND)
    location_ids = dems["location_id"]
    age_group_ids = dems["age_group_id"]

    if ecode in inj_info.IM_RATIO_ECODES and year_id < help.LAST_YEAR:
        if year_id < 2010:
            year_end = year_id + 5
            mort_year_end = year_end - 1
        else:
            year_end = help.LAST_YEAR
            mort_year_end = year_end
        measure_dict = get_measures_interpolate(me_id, location_ids, sex_id, age_group_ids, inc_id, rms_id, emr_id,
                                                year_id, year_end)

        for year in range(year_id,mort_year_end+1):
            save_mortality(ecode,year,sex_id,location_ids,age_group_ids, version)

    else:
        measure_dict = get_measures_get_draws(me_id, location_ids, year_id, sex_id, age_group_ids,
                                              inc_id, rms_id, emr_id)
    
    return measure_dict
Exemplo n.º 4
0
def summarize_loc(source,
                  drawdir,
                  outdir,
                  location_id,
                  year_id,
                  rei_ids,
                  change_intervals=None,
                  gbd_round_id=5):
    '''summarize every rei for a single location'''
    # Set global age weights
    gbd_round_map = get_ids('gbd_round')
    gbd_round = gbd_round_map.loc[gbd_round_map.gbd_round_id ==
                                  gbd_round_id].gbd_round.item()
    Globals.aw = get_age_weights(gbd_round_id=int(gbd_round_id))
    # Set global population
    pops = []
    popfiles = glob(os.path.join(drawdir, 'population_*.csv'))
    for popfile in popfiles:
        pops.append(pd.read_csv(popfile))
    pops = pd.concat(pops).drop_duplicates(
        subset=['location_id', 'age_group_id', 'year_id', 'sex_id'])
    Globals.pop = pops.rename(columns={'population': 'pop_scaled'})

    pool = Pool(10)
    results = pool.map(summ_loc, [((source, location_id, rei, year_id,
                                    change_intervals, gbd_round_id), {})
                                  for rei in rei_ids])
    pool.close()
    pool.join()
    results = [res for res in results if isinstance(res, tuple)]
    results = list(zip(*results))

    single_year = pd.concat([res for res in results[0] if res is not None])
    single_year = single_year[[
        'rei_id', 'location_id', 'year_id', 'age_group_id', 'sex_id',
        'measure_id', 'metric_id', 'val', 'lower', 'upper'
    ]]
    single_file = os.path.join(outdir,
                               'single_year_{}.csv'.format(location_id))
    single_year.to_csv(single_file, index=False)
    os.chmod(single_file, 0o775)

    multi_year = pd.concat(results[1])
    if len(multi_year) > 0:
        multi_year = multi_year[[
            'rei_id', 'location_id', 'year_start_id', 'year_end_id',
            'age_group_id', 'sex_id', 'measure_id', 'metric_id', 'val',
            'lower', 'upper'
        ]]
        multi_year.replace([np.inf, -np.inf], np.nan)
        multi_year.dropna(inplace=True)
        multi_file = os.path.join(outdir,
                                  'multi_year_{}.csv'.format(location_id))
        multi_year.to_csv(multi_file, index=False)
        os.chmod(multi_file, 0o775)
Exemplo n.º 5
0
def get_sy_population(grp_pop, sex_id, pop_dir):
    terminal_age = grp_pop.loc[grp_pop["age"] == 95]
    under1 = grp_pop.loc[grp_pop.age < 1]
    under1 = under1.groupby(['location_id', 'sex_id',
                             'year_id']).sum().reset_index()
    under1["age"] = 0

    master = xr.open_dataset(os.path.join(pop_dir, 'sypops.nc'))
    master = master.loc[{'sex_id': [sex_id]}]
    master = master.to_dataframe().reset_index()

    age_ids = db.get_ids(table="age_group")
    master = master.merge(age_ids, on=['age_group_id'])
    master.rename(columns={'age_group_name': 'age'}, inplace=True)
    master.drop('age_group_id', inplace=True, axis=1)
    master.loc[master.age == "95 plus", 'age'] = 95
    master["age"] = master["age"].astype(float)
    master = master.append(terminal_age)
    master = master.append(under1)

    return master
Exemplo n.º 6
0
def setup_for_shiny(df, out_path):
    """
	Description:
		Prepares the final result of the '00_prep_hf_mktscan_parallel.py' for
		a diagnostic visualization.
	Args:
		df (object): pandas dataframe object of input data
		engine (object): ihme_databases class instance with dUSERt
			engine set
		me_id (int): modelable_entity_id for the dataset in memory

	Returns:
		Returns a copy of the dataframe with the seqs filled in,
		increment strarting from the max of the database for the given
		modelable_entity.
	"""

    # columns necessary for creating appending necesary aggregates and adding
    # columns
    # with metadata useful to diagnostics (e.g. location name)
    index_cols = [
        'hf_target_prop', 'std_err_adj', 'sex_id', 'cause_id', 'age_group_id'
    ]

    # columns used for creating aggregates for the region and super region
    # proportions.
    group_cols = ['sex_id', 'cause_id', 'age_group_id']

    # columns used in the final dataset.
    final_cols = [
        'hf_target_prop', 'std_err_adj', 'location_id', 'location_ascii_name',
        'sex_id', 'cause_id', 'age_group_id', 'age_group_name', 'cause_name'
    ]

    locations = get_location_metadata(location_set_id=35)\
                                                       [['location_id',
                                                         'location_ascii_name']]
    ages = get_ids('age_group')
    causes = get_ids('cause')

    # Exclude composite etiologies for input diagnostics
    df = df.query('cause_id not in (520, 385, 499)')

    # location metadata
    df = df.merge(locations, on='location_id', how='inner')

    # add column with age group names
    df = df.merge(ages, on='age_group_id', how='inner')

    # To make the age progression linear and consecutive recode some of the
    # age_groups.
    df['age_group_id'] = df['age_group_id'].replace(to_replace=28, value=4)
    df.sort_values(by='age_group_id', axis=0, ascending=True, inplace=True)

    # add column with cause names
    df = df.merge(causes, on='cause_id', how='inner')

    # drop unnecessary columns
    df = df[final_cols]

    df.rename(columns={'hf_target_prop': 'proportion'}, inplace=True)
    df.rename(columns={'hf_target_prop': 'standard_error'}, inplace=True)

    # write the diagnostic input data to csv
    df.to_csv("{}hf_inputs.csv".format(out_path),
              index=False,
              encoding='utf-8')
Exemplo n.º 7
0
def get_modelable_entity_name(bundle_id):
    me_id = map_df.loc[map_df.fullmod_bundle == bundle_id, 'fullmod_ME'].item()
    me_meta = get_ids('modelable_entity')
    me_name = me_meta.loc[me_meta.modelable_entity_id == me_id,
                          'modelable_entity_name'].item()
    return "Birth prevalence of {}".format(me_name.lower())
Exemplo n.º 8
0
    loc_meta = get_location_metadata(location_set_id=35, gbd_round_id=5)
    loc_meta.to_csv(os.path.join(code_dir, 'location_metadata.csv'), 
        index=False, encoding='utf8')
    norway_id = 90
    norway_subs = loc_meta.loc[loc_meta.parent_id==norway_id, 'location_id'].tolist() + [norway_id]
    country_pop = get_population(location_id=norway_id, year_id='all', 
        sex_id='all', age_group_id=164, gbd_round_id=5, status='best')
    country_pop.drop('location_id', axis=1, inplace=True)
    subs_pop = get_population(location_id=norway_subs, year_id='all', 
        sex_id='all', age_group_id=164, gbd_round_id=5, status='best')
    population = subs_pop.merge(country_pop, 
        on=[c for c in subs_pop.columns if c not in ['location_id','population']], 
        suffixes=('_subs', '_national'))
    population.loc[:, 'pop_weight'] = population['population_subs'] / 
        population['population_national']
    sex_meta = get_ids('sex')
    population = pd.merge(population, sex_meta, on='sex_id')
    population.loc[:, 'age_start'] = 0
    population.rename(columns={'year_id':'year_start'}, inplace=True)
    population.to_csv(os.path.join(code_dir, 'norway_population.csv'), 
        index=False, encoding='utf8')


cause_bundle_pairs = list(zip(map_df.cause, map_df.fullmod_bundle))
bundle_num = len(map_df.fullmod_bundle.tolist())

if have_paths==0:
    job_string = ''
    for cause, bundle in cause_bundle_pairs:
        bundle = int(bundle)
        job_name = "get_reqids_{b}_{c}".format(b=bundle, c=cause)
Exemplo n.º 9
0
	# Make the file paths for draws
	FILE_PATHS = [out_path + 'diagnostics/',
				  out_path + 'prevalence/']
	
	for file_path in FILE_PATHS:
		if not os.path.exists(file_path):
			os.makedirs(file_path)
	
	# Make correction factors
	mktscan_draws = get_correction_factors(info_path,
	                                       in_path,
										   out_path,
										   draws_path)
		
	# Get age group, and cause- IDs, and names:
	AGE_NAMES = get_ids('age_group')[['age_group_id', 'age_group_name']]
	CAUSE_NAMES = get_ids('cause')[['cause_id', 'cause_name']]
	
	# get the locations
	locations_df = get_location_metadata(location_set_id=9)
	
	# filter out locations not used in in Epi and non-admin0 locations
	DEMOGRAPHICS = list(get_demographics(gbd_team='epi')['location_ids'])
	
	# If admin0 only is selected then only take those 
	if admin0_only == "YES":
		LOCATION_NAMES = locations_df.query('location_type_id == 2'.format((DEMOGRAPHICS)))[['location_id', 'location_name']]
	else:
		LOCATION_NAMES = locations_df.query('location_id in {} or location_type_id == 2'.format((DEMOGRAPHICS)))[['location_id', 'location_name']]
	
	# location IDs
Exemplo n.º 10
0
import gbd.constants as gbd
from gbd.decomp_step import decomp_step_id_from_decomp_step
from gbd.estimation_years import gbd_round_from_gbd_round_id
from test_support.profile_support import profile

from split_models.exceptions import IllegalSplitCoDArgument
from split_models.job_classes import SplitCoDSwarm
from split_models.validate import (validate_decomp_step_input, validate_ids)

if sys.version_info > (3, ):
    long = int

REPORTING_CAUSE_SET_ID, COMPUTATION_CAUSE_SET_ID = 3, 2

# Create list of valid cause_ids
VALID_CAUSE_IDS = get_ids(table='cause').cause_id.unique()
VALID_MEIDS = get_ids(table='modelable_entity').modelable_entity_id.unique()


@profile
def _launch_cod_splits(source_cause_id, target_cause_ids, target_meids,
                       prop_meas_id, gbd_round_id, decomp_step, output_dir,
                       project):
    """
    Split the given source_cause_id given target_meid proportions, saved
    to the target_cause_ids in output_dir.

    Arguments:
        source_cause_id (int): cause_id for the draws to be split
        target_cause_ids (intlist): list of cause ids that you want the new
            outputted subcauses to be identified by
Exemplo n.º 11
0
 def new_cause_list(self):
     cause_list = get_ids(table="cause")
     self.cause_list = cause_list[["cause_id", "acause"]]