def main(version): # pull the demographics for COD because we want every year dems = db.get_demographics(gbd_team = "cod", gbd_round_id=help.GBD_ROUND) rate_years = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND)['year_id'] indexcols = ['location_id', 'sex_id', 'year_id', 'age_group_id'] # make directory for these files for a given version outdir = os.path.join("FILEPATH") if not os.path.exists(outdir): os.makedirs(outdir) raw_rate_in = rate_in_baseline() # (1) Mortality mort = get_mortality(dems, shock=0) write_results(df=mort, filepath=os.path.join(outdir, "mortality.nc"), indexcols=indexcols) rate_in_omega(mort, raw_rate_in, rate_years, outdir) # (2) Populations pops = get_populations(dems) write_results(df=pops, filepath=os.path.join(outdir, "FILEPATH.nc"), indexcols=indexcols) # (3) Single-Year Populations sy_pops = get_sy_populations(dems) write_results(df=sy_pops, filepath=os.path.join(outdir, "FILEPATH.nc"), indexcols=indexcols)
def get_gbd_estimation_years(gbd_round_id: int) -> List[int]: """Gets the estimation years for a particular gbd round.""" from db_queries import get_demographics warnings.filterwarnings("default", module="db_queries") return get_demographics(gbd_constants.CONN_DEFS.EPI, gbd_round_id=gbd_round_id)['year_id']
def main(input_dir, year_id, sex_id, location_id, dm_out_dir, location): #grab mortality csv from parent and subset dems = db.get_demographics(gbd_team="epi") # added an outcome loop to create a different rate in file; as epilepsy has a rho (remission) for outcome in ["long_modsev", "epilepsy"]: mortality = pd.read_csv( os.path.join(dm_out_dir, "02_temp/03_data/all_cause_mortality.csv")) #mortality = mortality['age_group_id' == dems["age_group_ids"], 'location_id' == location_id, 'year_id' == year_id, 'sex_id' == sex_id] mortality = mortality.loc[ (mortality.age_group_id.isin(dems['age_group_id'])) & (mortality.location_id == location_id) & (mortality.year_id == year_id) & (mortality.sex_id == sex_id)] #format mortality to append mortality = format_mortality_for_rate_in(mortality, input_dir) # grab iso3 for naming and file structure, pull and format all-cause mortality, run make_rate_in to generate loc_year_sex specific files make_rate_in(mortality, location_id, sex_id, year_id, dm_out_dir, input_dir, outcome) ## create a finished.txt for check system finished = [] checks = os.path.join(dm_out_dir, "04_ODE/rate_in/checks") if not os.path.exists(checks): os.makedirs(checks) np.savetxt( os.path.join( checks, "finished_{}_{}_{}.txt".format(location_id, sex_id, year_id)), finished)
def main(version): start = help.start_timer() # pull identifiers ratio_ecodes = inj_info.IM_RATIO_ECODES dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND) sexes = dems['sex_id'] mortyears = list(filter(lambda x: x >= 1990, dems['year_id'])) regmap = get_region_map(dems['location_id']) # iterate over year sex and ecode to get the ratios all in one data frame final_list = [] for ecode in ratio_ecodes: year_arr_list = [] for year in mortyears: sex_arr_list = [] for sex in sexes: print('{}, {}, sex {}'.format(ecode, year, sex)) sys.stdout.flush() # write to log file ratios = compute_ratio(ecode, str(versions.get_best_version(ecode)), year, sex) sex_arr_list.append(ratios) combined_sexes = xr.concat(sex_arr_list, 'sex_id') year_arr_list.append(combined_sexes) combined_years = xr.concat(year_arr_list, 'year_id') print('Summarize {}'.format(ecode)) summarized = summarize(combined_years, regmap) final_list.append(summarized) final_ratios = xr.concat(final_list, pd.Index(ratio_ecodes, name='ecode')) print('Write results') write_results(final_ratios, version) help.end_timer(start)
class Ages: MOST_DETAILED_GROUP_IDS: List[int] = get_demographics( gbd_round_id=gbd.GBD_ROUND_ID, gbd_team='cod' )[Columns.AGE_GROUP_ID] ALL_AGE_GROUPS: List[int] = MOST_DETAILED_GROUP_IDS + gbd.GBD_COMPARE_AGES END_OF_ROUND_AGE_GROUPS: List[int] = [37, 39, 155, 160, 197, 228, 230, 232, 243, 284, 285, 286, 287, 288, 289, 420, 430]
def get_measures(ecode, me_id, year_id, sex_id, version): ids = db.get_ids(table='measure') inc_id = ids.loc[ids["measure_name"] == "Incidence", 'measure_id'].iloc[0] rms_id = ids.loc[ids["measure_name"] == "Remission", 'measure_id'].iloc[0] emr_id = ids.loc[ids["measure_name"] == "Excess mortality rate", 'measure_id'].iloc[0] dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND) location_ids = dems["location_id"] age_group_ids = dems["age_group_id"] if ecode in inj_info.IM_RATIO_ECODES and year_id < help.LAST_YEAR: if year_id < 2010: year_end = year_id + 5 mort_year_end = year_end - 1 else: year_end = help.LAST_YEAR mort_year_end = year_end measure_dict = get_measures_interpolate(me_id, location_ids, sex_id, age_group_ids, inc_id, rms_id, emr_id, year_id, year_end) for year in range(year_id,mort_year_end+1): save_mortality(ecode,year,sex_id,location_ids,age_group_ids, version) else: measure_dict = get_measures_get_draws(me_id, location_ids, year_id, sex_id, age_group_ids, inc_id, rms_id, emr_id) return measure_dict
def transform_to_rate_space(summary_df): ''' Transform the summary data into rate space to allow for easier validation ''' print("**TRANSFORMING TO RATE SPACE**") demos = get_demographics(gbd_round_id=5, gbd_team='cod') year_ids = list(range(1950,2018)) full_pops = get_population(gbd_round_id=5, age_group_id=demos['age_group_id'], sex_id=demos['sex_id'], year_id=year_ids, run_id= 104, location_id=demos['location_id']) full_pops = full_pops.loc[:,['age_group_id','sex_id','year_id','location_id', 'population']] summary_df = pd.merge(left=summary_df, right=full_pops, on=['age_group_id','sex_id','year_id','location_id'], how='inner') for col in ['val','lower','upper']: summary_df[col] = summary_df[col] / summary_df['population'] summary_df.loc[summary_df[col]>1,col] = 1 summary_df.loc[summary_df[col]<0,col] = 0 summary_df = summary_df.drop(labels=['population'], axis=1) print(" ...Successfully transformed summary values into rate space.\n") return summary_df
def main(decomp, version): start = help.start_timer() ratio_ecodes = inj_info.IM_RATIO_ECODES dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND) sexes = dems['sex_id'] mortyears = list([x for x in dems['year_id'] if x >= 1990]) regmap = get_region_map(dems['location_id']) final_list = [] for ecode in ratio_ecodes: year_arr_list = [] for year in mortyears: sex_arr_list = [] for sex in sexes: ratios = compute_ratio(ecode, decomp, str(versions.get_best_version(ecode)), year, sex) sex_arr_list.append(ratios) combined_sexes = xr.concat(sex_arr_list, 'sex_id') year_arr_list.append(combined_sexes) combined_years = xr.concat(year_arr_list, 'year_id') print(('Summarize {}'.format(ecode))) summarized = summarize(combined_years, regmap) final_list.append(summarized) final_ratios = xr.concat(final_list, pd.Index(ratio_ecodes, name='ecode')) write_results(final_ratios, decomp, version) help.end_timer(start)
def copy_draws(draws_dir, meid): locs = [] for f in glob.glob(f"{draws_dir}/{meid}/*.csv"): locs.append(int(f.rsplit("/")[-1][:-4])) study_locs = db.get_demographics("epi")["location_id"] loc_h = db.get_location_metadata(35) missing = [l for l in study_locs if l not in locs] zero_draws = pd.read_csv(f'{draws_dir}/{meid}/101.csv') draw_cols = zero_draws.columns[zero_draws.columns.str.contains("draw")] zero_draws[draw_cols] = zero_draws[draw_cols] * 0.0 print(len(missing)) for place in missing: if loc_h.loc[loc_h.location_id == place, "level"].values[0] == 3: zero_draws['location_id'] = place zero_draws.to_csv(f'{draws_dir}/{meid}/{place}.csv') elif loc_h.loc[loc_h.location_id == place, "level"].values[0] == 4: parent = loc_h.loc[loc_h.location_id == place, "parent_id"].values[0] draws = pd.read_csv(f'{draws_dir}/{meid}/{parent}.csv') draws['location_id'] = place draws.to_csv(f'{draws_dir}/{meid}/{place}.csv') print(place) return None
def copy_and_backfill(self): prof_id_cret_old = self.me_map["cretinism"]["srcs"] old = self.me_dict[prof_id_cret_old].reset_index() # Handle year differences between gbd2016 and gbd2017 old.loc[old.year_id == 2016, 'year_id'] = 2017 # Handle Saudia Arabia loc_meta = get_location_metadata(location_set_id=35, gbd_round_id=4) saudia_id = 152 saudia_sub_nats = loc_meta.loc[loc_meta.parent_id == saudia_id, 'location_id'].tolist() saudi_arabia = old.loc[old.location_id.isin(saudia_sub_nats), :] saudi_arabia.loc[:, 'location_id'] = saudia_id saudi_arabia = saudi_arabia.drop_duplicates(keep='first') old = pd.concat([old, saudi_arabia], axis=0) # Handle other location differences between gbd2016 and gbd2017 data_cols = self.draw_cols data_dct = {'data_cols': data_cols} index_cols = list(set(old.columns) - set(data_cols)) index_cols.remove('location_id') demo = get_demographics(gbd_team='epi', gbd_round_id=5) index_dct = { tuple(index_cols): list(set(tuple(x) for x in old[index_cols].values)), 'location_id': demo['location_id'] } gbdizer = gbdize.GBDizeDataFrame( dimensionality.DataFrameDimensions(index_dct, data_dct)) new = gbdizer.fill_location_from_nearest_parent(old, location_set_id=35, gbd_round_id=5) prof_id_cret_new = self.me_map["cretinism"]["trgs"] self.me_dict[prof_id_cret_new] = new
def grab_prevalence_draws(me_id, year_id, gbd_round_id, decomp_step) -> pd.DataFrame: # grabs prevalence draws for the given year, me_id, and locations demo = get_demographics("epi", gbd_round_id=gbd_round_id) print(demo['age_group_id'], me_id) df = get_draws('modelable_entity_id', me_id, source='epi', measure_id=5, location_id=demo['location_id'], year_id=year_id, age_group_id=demo['age_group_id'], sex_id=demo['sex_id'], gbd_round_id=gbd_round_id, decomp_step=decomp_step) return df
def get_all_cause_mortality(): dems = db.get_demographics(gbd_team = "epi", gbd_round_id = 6) mortality = db.get_envelope(age_group_id = dems["age_group_id"], location_id = dems["location_id"], year_id = dems["year_id"], sex_id = dems["sex_id"], with_hiv = 1, rates = 1, decomp_step = ds, gbd_round_id = gbd_round_id) # calculate the standard error mortality["std"] = (mortality["upper"] - mortality["lower"])/3.92 mortality.drop(['run_id'], axis = 1, inplace = True) filename = "all_cause_mortality.csv" mortality.to_csv(os.path.join(out_dir_rate, "02_temp/03_data", filename), index = False)
def main() -> None: args = parse_args() user = getpass.getuser() today_string = datetime.date.today().strftime('%m%d%y') workflow = Workflow( workflow_args=f'anemia_malaria_{args.decomp_step}_{today_string}', name=f'anemia_malaria_{args.decomp_step}_{today_string}', description= f'Anemia: Malaria pre-processing for decomp {args.decomp_step}', project="proj_anemia", stderr="FILEPATH", stdout="FILEPATH", working_dir=path_to_directory, resume=True) # first submit the subtract clinical jobs subtract_tasks = [] demo = get_demographics("epi", gbd_round_id=args.gbd_round_id) for loc in demo['location_id']: task = PythonTask(script="FILEPATH", args=[ "--location_id", loc, "--gbd_round_id", args.gbd_round_id, "--decomp_step", args.decomp_step, "--out_dir", args.out_dir ], name=f"malaria_subtract_{loc}", tag="malaria_subtract", num_cores=2, m_mem_free="8G", max_attempts=3, max_runtime_seconds=60 * 60 * 3, queue='all.q') subtract_tasks.append(task) workflow.add_tasks(subtract_tasks) # once the new draws exist, save results for modelable_entity_id in [19390, 19394]: task = PythonTask(script="FILEPATH", args=[ "--modelable_entity_id", modelable_entity_id, "--gbd_round_id", args.gbd_round_id, "--decomp_step", args.decomp_step, "--out_dir", args.out_dir ], name=f"malaria_save_{modelable_entity_id}", tag="malaria_save", upstream_tasks=subtract_tasks, num_cores=8, m_mem_free="100G", max_attempts=3, max_runtime_seconds=60 * 60 * 24, queue='all.q') workflow.add_task(task) status = workflow.run() print(f'Workflow finished with status {status}')
def upload(): folder = os.path.join('FILEPATH') dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND) save_results_epi(input_dir=folder, input_file_pattern='FILEPATH.h5', modelable_entity_id=3136, description='other msk adjusted for injuries fractures and dislocations', year_id=dems['year_id'], measure_id=5, mark_best=True) print('Successfully uploaded Other MSK')
def get_locations(): ''' Description: get list of locations to iterate through for every part of the maternal custom process, down to one level of subnationals Args: None Output: (list) location_ids ''' logger.info("Getting locations") locations_df = get_demographics(gbd_team="cod")["location_id"] return locations_df
def __init__(self, cluster_dir, year_id, input_me, output_me): '''This class incorporates all the functions that all the specific causes use, but all in different sequence''' self.cluster_dir = cluster_dir self.year_id = year_id self.input_me = input_me self.output_me = output_me self.conn_def = "cod" self.gbd_round = 5 epi_demographics = get_demographics("epi", gbd_round_id=5) self.most_detailed_ages = epi_demographics['age_group_id'] self.most_detailed_locs = epi_demographics['location_id']
def fill_square(df, col, gbd_round_id): '''make data square across a column for a set of index columns''' demo = get_demographics(gbd_team='epi', gbd_round_id=gbd_round_id) draw_cols = list(df.filter(like='draw_').columns) index_cols = list(set(df.columns) - set(draw_cols)) index_cols.remove(col) index_dct = { tuple(index_cols): list(set(tuple(x) for x in df[index_cols].values)), col: demo[col] } data_dct = {'draw_cols': draw_cols} gbdizer = gbdize.GBDizeDataFrame( dimensionality.DataFrameDimensions(index_dct, data_dct)) return gbdizer.fill_empty_indices(df, 0)
def new_simulation_index(self, year_id): lt = dbtrees.loctree( location_set_version_id=self.location_set_version_id) location_id = [loc.id for loc in lt.leaves()] demo = get_demographics(gbd_team="epi", gbd_round_id=self.gbd_round_id) if not year_id: year_id = demo['year_id'] simulation_index = { "year_id": year_id, "location_id": location_id, "sex_id": demo['sex_id'], "age_group_id": demo['age_group_id'] } self.simulation_index = simulation_index
def __init__(self, cluster_dir, year_id, input_me, output_me, decomp_step): '''This class incorporates all the functions that all the specific causes use, but all in different sequence''' self.cluster_dir = cluster_dir self.year_id = year_id self.input_me = input_me self.output_me = output_me self.conn_def = "cod" self.gbd_round = gbd.GBD_ROUND_ID self.decomp_step = decomp_step epi_demographics = get_demographics("epi", gbd_round_id=self.gbd_round) self.most_detailed_ages = epi_demographics['age_group_id'] self.most_detailed_locs = epi_demographics['location_id'] '''get_demographics should return the most detailed demographics for
def get_summary_data(infile,encoding,cause): print("**GETTING SUMMARY DATA*** {}".format(get_time_now())) full_data = pd.read_csv(infile, encoding=encoding) summary_df = full_data.loc[full_data['cause_id']==cause,:] # Data assertions required_cols = ['year_id','location_id','sex_id','age_group_id', 'val','lower','upper'] for col in required_cols: assert col in summary_df.columns, "Required column '{}' not in data".format(col) demos = get_demographics(gbd_team='cod',gbd_round_id=5) years = list(range(1950,2018)) summary_df = summary_df.loc[summary_df['location_id'].isin(demos['location_id']),:] summary_df = summary_df.loc[summary_df['year_id'].isin(years),:] print(" ...Summary data pulled. {}\n".format(get_time_now())) return summary_df
def adj_data_template(df): print("adjusting years") # Returns the closest year that contains GBD results # Create the year_id column and set it to a year that contains GBD results df = templating.df_mean(df, "year_id", ["year_start", "year_end"]) gbd_years = get_demographics('epi')['year_id'] #gbd_years = [1990,1995,2000,2005,2010,2015,2017] df['year_id'] = df['year_id'].apply( lambda x: min(gbd_years, key=lambda y: abs(y - x))) print("Adjusting sexes") # subset out demographics if 'sex_id' in df.columns: sex_dict = {1: 'Male', 2: 'Female', 3: 'Both'} df['sex'] = df.apply(lambda x: sex_dict[x['sex_id']], axis=1) else: sex_dict = { 'Male': 1, 'male': 1, 'Female': 2, 'female': 2, 'Both': 3, 'both': 3 } df['sex_id'] = df.apply(lambda x: sex_dict[x['sex']], axis=1) print("Adjusting ages") df = df.loc[((df["age_end"] - df["age_start"]) < 40) | # > 20 age group (df["age_start"] >= 80)] # or terminal #| (df["age_end"] ==125) # get age mapping age_map = get_age_metadata(age_group_set_id=12)[[ 'age_group_id', 'age_group_years_start', 'age_group_years_end' ]] # find the intersection between the in dataframe and the age group df df = adjust_span(df, ('age_start', 'age_end'), age_map, ('age_group_years_start', 'age_group_years_end')) if df.empty: raise NoNonZeroValues return df
def main(in_dir, out_dir): # grab the demographics to loop over when reading in all of the files dems = db.get_demographics(gbd_team="epi") # read in all of the files data = [] #test run #for loc in [10, 101]: for loc in dems['location_id']: print "Reading {}".format(loc) for year in dems['year_id']: for sex in dems['sex_id']: df = read_data(functional, outcome, loc, year, sex) data.append(df) # collapse the raw files so that it is just one value raw = pd.concat(data) # puts all of the little dfs into one big df collapsed = collapse( raw) # collapse it because we don't care about loc/year/sex # format the value file and then append to the collapsed values value = get_values() result = collapsed.append(value) # output the results to value_in directory folder = os.path.join(out_dir, "value_in") if not os.path.exists(folder): os.makedirs(folder) filepath = os.path.join( folder, "value_in_{functional}_{outcome}.csv".format(functional=functional, outcome=outcome)) result.to_csv(filepath, index=False) ## create a finished.txt for check system finished = [] checks = os.path.join(out_dir, "value_in/checks") if not os.path.exists(checks): os.makedirs(checks) np.savetxt( os.path.join(checks, "finished_{}_{}.txt".format(functional, outcome)), finished)
def get_data(self, model_version_id): demo_query = """ SELECT t3mvd.model_version_dismod_id as {data_key}, t3mvd.location_id, t3mvd.sex_id, t3mvd.year_start, t3mvd.year_end, t3mvd.age_start, t3mvd.age_end, t3mvd.measure_id, t3mvd.nid, t3mvd.underlying_nid, t3mvd.outlier_type_id FROM epi.t3_model_version_dismod t3mvd WHERE t3mvd.model_version_id = {model_version_id} """.format(data_key=self._data_key, model_version_id=model_version_id) df = ezfuncs.query(demo_query, conn_def=envs.Environment.get_odbc_key()) # subset out demographics df = df.loc[df["sex_id"] != 3] # get rid of both sex df = df.loc[((df["age_end"] - df["age_start"]) < 20) | # > 20 age group (df["age_start"] >= 80)] # or terminal if df.empty: raise NoNonZeroValues # Add a year_id column df['year_mid'] = (df['year_start'] + df['year_end']) / 2 gbd_years = get_demographics('epi')['year_id'] df['year_id'] = df['year_mid'].apply( lambda x: min(gbd_years, key=lambda y: abs(y - x))) df = df.drop(labels=['year_mid'], axis=1) # set index df[self._data_key] = df[self._data_key].astype(int) df = df.set_index(self._data_key) return df
def main(ecode, year_id, sex_id, decomp, version): tic = time.time() version = version.rstrip() dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND) env_version = versions.get_env(ecode, version) pop_path = paths.DATA_DIR / f"flats/{env_version}/pops.nc" pops = xr.open_dataset(pop_path) pops = pops.loc[{'year_id': [year_id], 'sex_id': [sex_id]}] version = version.rstrip() ratio_file = xr.open_dataarray( os.path.join(paths.DATA_DIR, decomp, 'inc_mortality_ratios', str(versions.get_crv(ecode, version)), 'ratios.nc')) regmap = get_region_map(dems['location_id']) incidence = get_shock_inc(ecode, pops, dems, year_id, sex_id, decomp, ratio_file, regmap) write_results(incidence, ecode, decomp, version, year_id, sex_id)
def _add_super_squeeze_task(self, node): logging.info(f"Adding {node} task") # make output directories self._create_output_directories(self.pgraph.nodes[node]["outs"]) # get dependency_list before parallelizing since the # dependencies are the same for each parallelized demographic dep_list = get_dependencies(node, self.pgraph, self._task_registry) epi_demo = get_demographics("epi", gbd_round_id=self.gbd_round_id) for location_id in epi_demo[Params.LOCATION_ID]: for year_id in self.YEAR_IDS: for sex_id in epi_demo[Params.SEX_ID]: ss_task = self._super_squeeze_fac.get_task( node=node, output_dir=self.DATA_DIR, location_id=location_id, year_id=year_id, sex_id=sex_id, decomp_step=self.decomp_step, n_draws=self.N_DRAWS, dependency_list=dep_list) self.workflow.add_task(ss_task) self._task_registry[SuperSqueezeFactory.get_task_name( node, location_id, year_id, sex_id)] = ss_task ss_upstream = [ self._task_registry[t] for t in list(self._task_registry.keys()) if DAG.Tasks.SUPER_SQUEEZE in t ] description = ( f"Super_Squeeze_auto_mark_{Params.DESCRIPTION_MAP[self.N_DRAWS]}") measure_id = [gbd.measures.PREVALENCE] for meid in self.pgraph.nodes[node]["outs"]: self._add_save_task( meid, "{location_id}/{measure_id}_{year_id}_{sex_id}.h5", description, measure_id, self.YEAR_IDS, self.N_DRAWS, ss_upstream)
def get_hale_ages(gbd_round_id: int) -> Tuple[List[int], List[int]]: """ Pull age group IDs used in HALE outputs and age group IDs used to aggregate under-one ages. """ # Get age group IDs used in this GBD round and sort chronologically. demo_df = db_queries.get_demographics('epi', gbd_round_id) age_group_ids = demo_df[columns.AGE_GROUP_ID] age_spans = get_age_spans()\ .query(f'{columns.AGE_GROUP_ID} in @age_group_ids')\ .sort_values(columns.AGE_GROUP_YEARS_START) # Get under-one age groups (including birth) and full list of HALE age # groups. under_one_ages = [age_groups.BIRTH] + age_spans.loc[ age_spans[columns.AGE_GROUP_YEARS_START] < 1, columns.AGE_GROUP_ID ].tolist() hale_ages = [age_groups.UNDER_ONE] + age_spans.loc[ ~age_spans[columns.AGE_GROUP_ID].isin(under_one_ages), columns.AGE_GROUP_ID ].tolist() return hale_ages, under_one_ages
###################################################### #import modules import db_queries as db import pandas as pd import numpy as np import os import copy import sys import time import shutil #import demographics, set directories, set variables dems = db.get_demographics(gbd_team = "epi") location_ids = dems['location_id'] code_dir = "FILEPATH" shell = "FILEPATH" in_dir = "FILEPATH" in_dir_data = "FILEPATH" + date + "FILEPATH" in_dir_value = "FILEPATH" + date + "FILEPATH" in_dir_rate = "FILEPATH" + date + "FILEPATH" out_dir_ODE = "FILEPATH" + date + "FILEPATH" outcomes = ["epilepsy", "long_modsev"] functional = "encephalitis" ##delete / recreate checks folder, log start time
###################################################### # import modules import db_queries as db import pandas as pd import numpy as np import os import copy import sys import time import shutil #import demographics, set directories, set variables dems = db.get_demographics(gbd_team = "epi", gbd_round_id = gbd_round) location_ids = dems['location_id'] # test dems # location_ids = [44651] #create test variables !! Do not forget to open parallel file and select one year and sex if trying to replicate one file code_dir = # filepath shell = code_dir +"python_shell.sh" in_dir = # filepath in_dir_data = # filepath in_dir_value = # filepath in_dir_rate = # filepath out_dir_ODE = # filepath
def get_age_groups(): return get_demographics('cod')['age_group_ids']
class SquareImport(object): _epi_demographics = get_demographics("epi", gbd_round_id=5) _idx_dmnsns = { "year_id": _epi_demographics['year_id'], "age_group_id": _epi_demographics['age_group_id'], "sex_id": [1, 2], "location_id": _epi_demographics['location_id'], "measure_id": [5, 6] } _draw_cols = ["draw_{i}".format(i=i) for i in range(0, 1000)] def __init__(self, idx_dmnsns=None, draw_cols=None): if idx_dmnsns is None: self.idx_dmnsns = collections.OrderedDict( sorted(self.default_idx_dmnsns.items())) else: self.idx_dmnsns = collections.OrderedDict( sorted(idx_dmnsns.items())) if draw_cols is None: self.draw_cols = self.default_draw_cols else: self.draw_cols = draw_cols # expected index self.index_df = self.get_index_df() @ClassProperty @classmethod def default_idx_dmnsns(cls): return cls._idx_dmnsns.copy() @ClassProperty @classmethod def default_draw_cols(cls): return cls._draw_cols[:] def get_index_df(self): """create template index for square dataset""" idx = pd.MultiIndex.from_product(self.idx_dmnsns.values(), names=self.idx_dmnsns.keys()) return pd.DataFrame(index=idx) def import_square(self, gopher_what, source, filler=None, **kwargs): """get draws for the specified modelable entity by dimensions""" if not kwargs: kwargs = self.idx_dmnsns.copy() if filler is None: filler = 0 df = get_draws(gbd_id_type=gopher_what.keys(), gbd_id=gopher_what.values(), source=source, measure_id=kwargs['measure_id'], location_id=kwargs['location_id'], year_id=kwargs['year_id'], age_group_id=kwargs['age_group_id'], sex_id=kwargs['sex_id'], gbd_round_id=5) for c in self.idx_dmnsns.keys(): df[c] = pd.to_numeric(df[c]) df = df.set_index(self.idx_dmnsns.keys()) df = df[self.draw_cols] df = pd.concat([self.index_df, df], axis=1) df.fillna(value=filler, inplace=True) return df