def main(version): start = help.start_timer() # pull identifiers ratio_ecodes = inj_info.IM_RATIO_ECODES dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND) sexes = dems['sex_id'] mortyears = list(filter(lambda x: x >= 1990, dems['year_id'])) regmap = get_region_map(dems['location_id']) # iterate over year sex and ecode to get the ratios all in one data frame final_list = [] for ecode in ratio_ecodes: year_arr_list = [] for year in mortyears: sex_arr_list = [] for sex in sexes: print('{}, {}, sex {}'.format(ecode, year, sex)) sys.stdout.flush() # write to log file ratios = compute_ratio(ecode, str(versions.get_best_version(ecode)), year, sex) sex_arr_list.append(ratios) combined_sexes = xr.concat(sex_arr_list, 'sex_id') year_arr_list.append(combined_sexes) combined_years = xr.concat(year_arr_list, 'year_id') print('Summarize {}'.format(ecode)) summarized = summarize(combined_years, regmap) final_list.append(summarized) final_ratios = xr.concat(final_list, pd.Index(ratio_ecodes, name='ecode')) print('Write results') write_results(final_ratios, version) help.end_timer(start)
def main(decomp, version): start = help.start_timer() ratio_ecodes = inj_info.IM_RATIO_ECODES dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND) sexes = dems['sex_id'] mortyears = list([x for x in dems['year_id'] if x >= 1990]) regmap = get_region_map(dems['location_id']) final_list = [] for ecode in ratio_ecodes: year_arr_list = [] for year in mortyears: sex_arr_list = [] for sex in sexes: ratios = compute_ratio(ecode, decomp, str(versions.get_best_version(ecode)), year, sex) sex_arr_list.append(ratios) combined_sexes = xr.concat(sex_arr_list, 'sex_id') year_arr_list.append(combined_sexes) combined_years = xr.concat(year_arr_list, 'year_id') print(('Summarize {}'.format(ecode))) summarized = summarize(combined_years, regmap) final_list.append(summarized) final_ratios = xr.concat(final_list, pd.Index(ratio_ecodes, name='ecode')) write_results(final_ratios, decomp, version) help.end_timer(start)
def main(ecode, ncode, version): start = help.start_timer() if ncode in inj_info.ST_NCODES: print("Getting durations, percent treated, and disability weights...") if ecode in inj_info.SHOCK_ECODES: year_set = 'full' else: year_set = 'all' pct_treated = calculate_measures.pct_treated( year_id=year_set) # defaults to 10% min treated, 75 haqi cap durations = calculate_measures.get_durations(pct_treated, ncode=ncode) dws = load_measures.disability_weights_st().loc[{'ncode': ncode}] if ecode in inj_info.SHOCK_ECODES: dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND) years = [y for y in dems['year_id'] if y >= 1990] else: dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND) years = dems['year_id'] for year in years: print("----------------------------------") print("Working on {}".format(year)) sys.stdout.flush() print( "(1): Get short-term split incidence & write collapsed incidence results." ) incidence = load_measures.short_term_incidence_split( ecode, version, ncode, year) write_results(incidence, version, ecode, ncode, year, measure_id=6) if ncode in inj_info.ST_NCODES: # only run st prev and ylds on short term ncodes # prevalence - we will also use this for fake long term if it's a long term ncode print("(2): Get short-term prevalence & write results.") st_prevalence = calculate_measures.compute_prevalence( incidence, durations) write_results(st_prevalence, version, ecode, ncode, year, measure_id=35) # ylds print("(3): Get YLDs & write results.") ylds = calculate_measures.short_term_ylds(st_prevalence, dws) write_results(ylds, version, ecode, ncode, year, measure_id=3) if ncode in inj_info.LT_NCODES: raw_lt = read_ode(ecode, ncode, year, version) if ncode in inj_info.ST_NCODES: # get the fake long-term draw fake_lt = get_fake_long_term(ncode, year, st_prevalence) fake_lt = fake_lt.loc[{'ncode': ncode}].drop('ncode') if 'ecode' in fake_lt.coords: fake_lt = fake_lt.drop('ecode') real_lt = raw_lt - fake_lt real_lt.values[real_lt.values < 0] = 0 else: real_lt = raw_lt.copy() # Expert adjustments # 1. Do not allow certain outpatient long-term # 2. Delete under 1 lt prevalence of shocks # 3. Subtract weird long-term animal contact if ncode in ["N48", "N26", "N11", "N19", "N43", "N25", "N23"]: real_lt.loc[{'platform': ['outpatient']}] = 0 if ecode in inj_info.SHOCK_ECODES: real_lt.loc[{'age_group_id': [2, 3, 4]}] = 0 if ecode == "inj_animal_nonven" or ecode == "inj_animal_venom": real_lt = animal_adjustment(real_lt, ecode, ncode) write_results(real_lt, version, ecode, ncode, year, measure_id=36) sys.stdout.flush() print('All done!') help.end_timer(start)
def main(ecode, ncode, platform, year, decomp, version, flat_version): toc = time.time() dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND) dm_settings = os.path.join(paths.SHARE_DIR, 'dismod_settings') version = version.rstrip() dm_dir = os.path.join(paths.DATA_DIR, decomp, inj_info.ECODE_PARENT[ecode], str(version), "dismod_ode", ecode) metaloc = db.get_location_metadata(location_set_id=35, gbd_round_id=help.GBD_ROUND) filepath = write_path(ecode, ncode, platform, year, decomp, version) locations = help.ihme_loc_id_dict(metaloc, dems['location_id']) alldata = [] value_in = os.path.join(dm_dir, "value_in", "value_in_{}_{}.csv".format(ncode, platform)) draw_in = os.path.join(dm_settings, "draw_in.csv") plain_in = os.path.join(dm_settings, "plain_in.csv") effect_in = os.path.join(dm_settings, "effect_in.csv") v_in = pd.read_csv(value_in) num_locs = len(locations) loc_pos = 0 initime = help.start_timer() for locn in locations: loc_pos = loc_pos + 1 for sex in [1, 2]: start = help.start_timer() if float(v_in.loc[v_in['name'] == 'eta_incidence', 'value'][0]) == 0: result = pd.DataFrame({'age_group_id': dems['age_group_id']}) result = result.assign(**{d: 0 for d in help.drawcols()}) result = help.convert_from_age_group_id(result) else: data_in = os.path.join( dm_dir, "data_in", locations[locn], str(year), str(sex), ecode, "data_in_{}_{}.csv".format(ncode, platform)) if ncode in inj_info.EMR_NCODES: rate_in_name = "rate_in_emr.csv" else: rate_in_name = "rate_in_no_emr.csv" rate_in = os.path.join(paths.DATA_DIR, 'flats', str(flat_version), 'rate_in', str(year), str(sex), locations[locn], rate_in_name) draw_out_dir = os.path.join(dm_dir, "prev_results", locations[locn], str(year), str(sex)) draw_out = os.path.join( draw_out_dir, "prevalence_{}_{}.csv".format(ncode, platform)) if not os.path.exists(draw_out_dir): try: os.makedirs(draw_out_dir) except OSError as e: if e.errno != os.errno.EEXIST: raise pass result = run_model_injuries(draw_in, data_in, value_in, plain_in, rate_in, effect_in, draw_out, 1000) result['location_id'] = locn result['platform'] = platform result['year_id'] = year result['sex_id'] = sex alldata.append(result) help.end_timer(start) sys.stdout.flush() # write to log file total_time = (time.time() - initime) / 60. final = pd.concat(alldata) write_results(final, ecode, ncode, platform, year, decomp, version) tic = time.time()
def main(ecode, ncode, platform, version): start = help.start_timer() parent = inj_info.ECODE_PARENT[ecode] flat_version = versions.get_env(parent, version) # get demographics print("1. Getting demographic, location, and long-term probabilities...") dems = db.get_demographics(gbd_team = "epi", gbd_round_id=help.GBD_ROUND) metaloc = db.get_location_metadata(location_set_id=35, gbd_round_id=help.GBD_ROUND) locations = help.ihme_loc_id_dict(metaloc, dems['location_id']) # get long-term probabilities that will be used and long-term standardized-mortality ratios lt_probs = calculate_measures.long_term_probs_combined(ncode=ncode) smr = load_measures.smr(ncode) # define DisMod ODE input directory dm_out_dir = os.path.join("FILEPATH") # make the sub-directory for data in files: folder = os.path.join("FILEPATH") if not os.path.exists(folder): try: os.makedirs(folder) except OSError as e: if e.errno != os.errno.EEXIST: raise pass print("2. Looping through years and sexes to make rate-in and data-in files.") value_data = [] for year in dems["year_id"]: for sex in dems["sex_id"]: measures = {} print('Working on year {} sex {}'.format(year, sex)) incidence = calculate_measures.long_term_incidence(ecode, version, ncode, platform, year, sex, lt_probs) inc_mean = incidence.mean(dim='draw') # if the value is less then one in a trillion, set to 0. Otherwise, DisMod can have an overflow issue where # it sets prevalence to 100% inc_summary = xr.merge([inc_mean.where(inc_mean > .000000000001, 0).rename('meas_value'), incidence.std(dim='draw').rename('meas_stdev')]) measures['incidence'] = inc_summary if ncode in inj_info.EMR_NCODES: emr = calculate_measures.emr(smr, year, sex, flat_version) emr_summary = xr.merge([emr.mean(dim='draw').rename('meas_value'), emr.std(dim='draw').rename('meas_stdev')]) measures['mtexcess'] = emr_summary print('Making data in') data = make_data_in(measures, ecode, version, ncode, platform, locations, year, sex) value_data.append(data) sys.stdout.flush() print("Finished making data in files.") print("4. Now making the value-in file with the saved data from data in process...") make_value_in(value_data, ecode, ncode, platform, dm_out_dir) help.end_timer(start)
def main(ecode, year_id, sex_id, platform, version): start = help.start_timer() dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND) income = get_income_map(dems['location_id']) # if this is a parent e-code, we now want to use the parent incidence to scale the child incidence # and we don't want to save the parent incidence at all. if ecode in inj_info.PARENT_ECODES: print( "This is a parent e-code, so now we are going to split and scale the children." ) # get parent info parent_inc = load_measures.short_term_incidence_unsplit( ecode, version, year_id, sex_id, platform) parent_mat = load_measures.en_matrix(ecode, sex_id, platform) # get child info children_inc = [] children_mat = [] for child in inj_info.ECODE_CHILD[ecode]: child_inc = load_measures.short_term_incidence_unsplit( child, version, year_id, sex_id, platform) child_mat = load_measures.en_matrix(child, sex_id, platform) children_inc.append(child_inc) children_mat.append(child_mat) child_inc = xr.concat( children_inc, pd.Index(inj_info.ECODE_CHILD[ecode], name='ecode')) child_mat = xr.concat( children_mat, pd.Index(inj_info.ECODE_CHILD[ecode], name='ecode')) # split into ncodes, scale the children, and save prep_file(ecode, year_id, sex_id, platform, version) mode = 'w' for n in inj_info.get_ncodes(platform): parent_n_mat = parent_mat.loc[{'ncode': [n]}] child_n_mat = child_mat.loc[{'ncode': [n]}] parent_split_inc = split_ncodes(parent_inc, parent_n_mat, income) child_split_inc = split_ncodes(child_inc, child_n_mat, income) scaled_child = child_split_inc * (parent_split_inc / child_split_inc.sum(dim='ecode')) scaled_child = scaled_child.fillna(0) write_results(scaled_child, ecode, year_id, sex_id, platform, version, mode=mode, group=n) mode = 'a' # after first time, change to 'a' so it appends other ncodes to the same file else: # non-parent ecode print( "This is a single e-code so we are just splitting it, no scaling.") inc = load_measures.short_term_incidence_unsplit( ecode, version, year_id, sex_id, platform) matx = load_measures.en_matrix(ecode, sex_id, platform) prep_file(ecode, year_id, sex_id, platform, version) mode = 'w' for n in inj_info.get_ncodes(platform): print(n) n_matx = matx.loc[{'ncode': [n]}] split_inc = split_ncodes(inc, n_matx, income) write_results(split_inc, ecode, year_id, sex_id, platform, version, mode=mode, group=n) mode = 'a' # after first time, change to 'a' so it appends other ncodes to the same file help.end_timer(start)
def main(ecode, ncode, platform, version): start = help.start_timer() parent = inj_info.ECODE_PARENT[ecode] flat_version = versions.get_env(parent, version) # need the cod demographics because dems = db.get_demographics(gbd_team="cod", gbd_round_id=help.GBD_ROUND) # get dfs used for long-term incidence and EMR lt_probs = calculate_measures.long_term_probs_combined(ncode, year_id='full') print "Working on {}".format(ncode) if ncode in inj_info.EMR_NCODES: smr = load_measures.smr(ncode) sy_pop = load_measures.population(flat_version, single_year=True) grp_pop = load_measures.population(flat_version) prev = xr.DataArray([0], dims='ncode', coords=[[ncode]]) for year in dems['year_id']: print(year) inc_list = [] emr_list = [] print('Getting incidence and emr if applicable') sys.stdout.flush() # write to log for sex in dems['sex_id']: sex_inc = calculate_measures.long_term_incidence( ecode, version, ncode, platform, year, sex, lt_probs) inc_list.append(sex_inc) if ncode in inj_info.EMR_NCODES: sex_emr = calculate_measures.emr(smr, year, sex, flat_version) emr_list.append(sex_emr) incidence = xr.concat(inc_list, dim='sex_id') print('Interpolating') sys.stdout.flush() # write to log inc_interp = interpolate_ages(incidence, sy_pop.loc[{ 'year_id': [year] }], grp_pop.loc[{ 'year_id': [year] }]) if ncode in inj_info.EMR_NCODES: emr = xr.concat(emr_list, dim='sex_id') emr_interp = interpolate_ages(emr, sy_pop.loc[{ 'year_id': [year] }], grp_pop.loc[{ 'year_id': [year] }]) else: emr_interp = xr.DataArray([0], dims='year_id', coords=[[year]]) print('Running ODE/incrementing process') sys.stdout.flush() # write to log # progress half year and save, for 1990 and on if year >= 1990: year_result = progress_half_year(prev, inc_interp, emr_interp, sy_pop.loc[{ 'year_id': [year] }], grp_pop.loc[{ 'year_id': [year] }]) write_results(year_result, ecode, ncode, platform, year, version) # then progress full year and increment, for all years but the last if year != help.LAST_YEAR: prev = progress_one_year(prev, inc_interp, emr_interp) help.end_timer(start)
def main(ecode, ncode, platform, year, version, flat_version): dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND) dm_settings = os.path.join("FILEPATH") dm_dir = os.path.join("FILEPATH") metaloc = db.get_location_metadata(location_set_id=35, gbd_round_id=help.GBD_ROUND) locations = help.ihme_loc_id_dict(metaloc, dems['location_id']) alldata = [] value_in = os.path.join("FILEPATH.csv".format(ncode, platform)) draw_in = os.path.join(dm_settings, "FILEPATH.csv") plain_in = os.path.join(dm_settings, "FILEPATH.csv") effect_in = os.path.join(dm_settings, "FILEPATH.csv") v_in = pd.read_csv(value_in) num_locs = len(locations) loc_pos = 0 initime = help.start_timer() for locn in locations: loc_pos = loc_pos + 1 for sex in [1,2]: print("Running DisMod ODE for location {} year {} sex {}".format(locations[locn], year, sex)) start = help.start_timer() if float(v_in.loc[v_in['name']=='eta_incidence','value'][0]) == 0: print('eta incidence is 0, so all incidence should be 0 and we\'ll just make an all 0 df instead of ' 'running the ODE') result = pd.DataFrame({'age_group_id': dems['age_group_id']}) result = result.assign(**{d: 0 for d in help.drawcols()}) result = help.convert_from_age_group_id(result) else: data_in = os.path.join("FILEPATH.csv".format(ncode, platform)) # create the rate in filepath based on whether it has excess mortality or not if ncode in inj_info.EMR_NCODES: rate_in_name = "FILEPATH.csv" else: rate_in_name = "FILEPATH.csv" rate_in = os.path.join("FILEPATH") draw_out_dir = os.path.join("FILEPATH") draw_out = os.path.join("FILEPATH.csv".format(ncode, platform)) if not os.path.exists(draw_out_dir): try: os.makedirs(draw_out_dir) except OSError as e: if e.errno != os.errno.EEXIST: raise pass result = run_model_injuries(draw_in, data_in, value_in, plain_in, rate_in, effect_in, draw_out, 1000) # format the results so that we have the identifying columns result['location_id'] = locn result['platform'] = platform result['year_id'] = year result['sex_id'] = sex alldata.append(result) help.end_timer(start) sys.stdout.flush() # write to log file total_time = (time.time() - initime)/60. print('Completed {} of {} locations in {} minutes. Will take {} more minutes at this rate'.format( loc_pos, num_locs, total_time, (total_time/loc_pos)*(num_locs-loc_pos))) sys.stdout.flush() # write to log file # concatenate all of the data together final = pd.concat(alldata) write_results(final, ecode, ncode, platform, year, version) print('Finished!')
def main(ecode, ncode, platform, decomp, version): start = help.start_timer() parent = inj_info.ECODE_PARENT[ecode] flat_version = versions.get_env(parent, version) dems = db.get_demographics(gbd_team="cod", gbd_round_id=help.GBD_ROUND) lt_probs = calculate_measures.long_term_probs_combined(ncode=ncode, decomp=decomp, year_id='full') if ncode in inj_info.EMR_NCODES: smr = load_measures.smr(ncode) sy_pop = load_measures.population(flat_version, single_year=True) grp_pop = load_measures.population(flat_version) prev = xr.DataArray([0], dims='ncode', coords=[[ncode]]) for year in dems['year_id']: inc_list = [] emr_list = [] for sex in dems['sex_id']: sex_inc = calculate_measures.long_term_incidence( ecode, decomp, version, ncode, platform, year, sex, lt_probs) inc_list.append(sex_inc) if ncode in inj_info.EMR_NCODES: sex_emr = calculate_measures.emr(smr, year, sex, flat_version) emr_list.append(sex_emr) incidence = xr.concat(inc_list, dim='sex_id') inc_interp = interpolate_ages(incidence, sy_pop.loc[{ 'year_id': [year] }], grp_pop.loc[{ 'year_id': [year] }]) if ncode in inj_info.EMR_NCODES: emr = xr.concat(emr_list, dim='sex_id') emr_interp = interpolate_ages(emr, sy_pop.loc[{ 'year_id': [year] }], grp_pop.loc[{ 'year_id': [year] }]) else: emr_interp = xr.DataArray([0], dims='year_id', coords=[[year]]) if year >= 1990: year_result = progress_half_year(prev, inc_interp, emr_interp, sy_pop.loc[{ 'year_id': [year] }], grp_pop.loc[{ 'year_id': [year] }]) write_results(year_result, ecode, ncode, platform, year, decomp, version) if year != help.LAST_YEAR: prev = progress_one_year(prev, inc_interp, emr_interp) help.end_timer(start)