Exemplo n.º 1
0
def main(version):

    # pull the demographics for COD because we want every year
    dems = db.get_demographics(gbd_team = "cod", gbd_round_id=help.GBD_ROUND)
    rate_years = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND)['year_id']
    indexcols = ['location_id', 'sex_id', 'year_id', 'age_group_id']

    # make directory for these files for a given version
    outdir = os.path.join("FILEPATH")
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    
    raw_rate_in = rate_in_baseline()
    
    # (1) Mortality
    mort = get_mortality(dems, shock=0)
    write_results(df=mort, filepath=os.path.join(outdir, "mortality.nc"),
                  indexcols=indexcols)

    rate_in_omega(mort, raw_rate_in, rate_years, outdir)

    # (2) Populations
    pops = get_populations(dems)
    write_results(df=pops, filepath=os.path.join(outdir, "FILEPATH.nc"), indexcols=indexcols)

    # (3) Single-Year Populations
    sy_pops = get_sy_populations(dems)
    write_results(df=sy_pops, filepath=os.path.join(outdir, "FILEPATH.nc"), indexcols=indexcols)
Exemplo n.º 2
0
def get_gbd_estimation_years(gbd_round_id: int) -> List[int]:
    """Gets the estimation years for a particular gbd round."""
    from db_queries import get_demographics
    warnings.filterwarnings("default", module="db_queries")

    return get_demographics(gbd_constants.CONN_DEFS.EPI,
                            gbd_round_id=gbd_round_id)['year_id']
def main(input_dir, year_id, sex_id, location_id, dm_out_dir, location):

    #grab mortality csv from parent and subset
    dems = db.get_demographics(gbd_team="epi")

    # added an outcome loop to create a different rate in file; as epilepsy has a rho (remission)
    for outcome in ["long_modsev", "epilepsy"]:
        mortality = pd.read_csv(
            os.path.join(dm_out_dir,
                         "02_temp/03_data/all_cause_mortality.csv"))

        #mortality = mortality['age_group_id' == dems["age_group_ids"], 'location_id' == location_id, 'year_id' == year_id, 'sex_id' == sex_id]
        mortality = mortality.loc[
            (mortality.age_group_id.isin(dems['age_group_id']))
            & (mortality.location_id == location_id) &
            (mortality.year_id == year_id) & (mortality.sex_id == sex_id)]

        #format mortality to append
        mortality = format_mortality_for_rate_in(mortality, input_dir)

        # grab iso3 for naming and file structure, pull and format all-cause mortality, run make_rate_in to generate loc_year_sex specific files
        make_rate_in(mortality, location_id, sex_id, year_id, dm_out_dir,
                     input_dir, outcome)

    ## create a finished.txt for check system
    finished = []
    checks = os.path.join(dm_out_dir, "04_ODE/rate_in/checks")
    if not os.path.exists(checks):
        os.makedirs(checks)
    np.savetxt(
        os.path.join(
            checks, "finished_{}_{}_{}.txt".format(location_id, sex_id,
                                                   year_id)), finished)
Exemplo n.º 4
0
def main(version):
    start = help.start_timer()
    # pull identifiers
    ratio_ecodes = inj_info.IM_RATIO_ECODES
    dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND)
    sexes = dems['sex_id']
    mortyears = list(filter(lambda x: x >= 1990, dems['year_id']))
    regmap = get_region_map(dems['location_id'])
    # iterate over year sex and ecode to get the ratios all in one data frame
    final_list = []
    for ecode in ratio_ecodes:
        year_arr_list = []
        for year in mortyears:
            sex_arr_list = []
            for sex in sexes:
                print('{}, {}, sex {}'.format(ecode, year, sex))
                sys.stdout.flush()  # write to log file
                ratios = compute_ratio(ecode,
                                       str(versions.get_best_version(ecode)),
                                       year, sex)
                sex_arr_list.append(ratios)
            combined_sexes = xr.concat(sex_arr_list, 'sex_id')
            year_arr_list.append(combined_sexes)
        combined_years = xr.concat(year_arr_list, 'year_id')
        print('Summarize {}'.format(ecode))
        summarized = summarize(combined_years, regmap)
        final_list.append(summarized)
    final_ratios = xr.concat(final_list, pd.Index(ratio_ecodes, name='ecode'))

    print('Write results')
    write_results(final_ratios, version)

    help.end_timer(start)
Exemplo n.º 5
0
class Ages:
    MOST_DETAILED_GROUP_IDS: List[int] = get_demographics(
        gbd_round_id=gbd.GBD_ROUND_ID, gbd_team='cod'
    )[Columns.AGE_GROUP_ID]
    ALL_AGE_GROUPS: List[int] = MOST_DETAILED_GROUP_IDS + gbd.GBD_COMPARE_AGES
    END_OF_ROUND_AGE_GROUPS: List[int] = [37, 39, 155, 160, 197, 228, 230, 232,
        243, 284, 285, 286, 287, 288, 289, 420, 430]
Exemplo n.º 6
0
def get_measures(ecode, me_id, year_id, sex_id, version):
    ids = db.get_ids(table='measure')
    inc_id = ids.loc[ids["measure_name"] == "Incidence", 'measure_id'].iloc[0]
    rms_id = ids.loc[ids["measure_name"] == "Remission", 'measure_id'].iloc[0]
    emr_id = ids.loc[ids["measure_name"] == "Excess mortality rate", 'measure_id'].iloc[0]

    dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND)
    location_ids = dems["location_id"]
    age_group_ids = dems["age_group_id"]

    if ecode in inj_info.IM_RATIO_ECODES and year_id < help.LAST_YEAR:
        if year_id < 2010:
            year_end = year_id + 5
            mort_year_end = year_end - 1
        else:
            year_end = help.LAST_YEAR
            mort_year_end = year_end
        measure_dict = get_measures_interpolate(me_id, location_ids, sex_id, age_group_ids, inc_id, rms_id, emr_id,
                                                year_id, year_end)

        for year in range(year_id,mort_year_end+1):
            save_mortality(ecode,year,sex_id,location_ids,age_group_ids, version)

    else:
        measure_dict = get_measures_get_draws(me_id, location_ids, year_id, sex_id, age_group_ids,
                                              inc_id, rms_id, emr_id)
    
    return measure_dict
Exemplo n.º 7
0
def transform_to_rate_space(summary_df):
    '''
    Transform the summary data into rate space to allow for easier validation
    '''
    print("**TRANSFORMING TO RATE SPACE**")
    demos = get_demographics(gbd_round_id=5, gbd_team='cod')
    year_ids = list(range(1950,2018))
    full_pops = get_population(gbd_round_id=5,
                               age_group_id=demos['age_group_id'],
                               sex_id=demos['sex_id'],
                               year_id=year_ids,
                               run_id= 104,
                               location_id=demos['location_id'])
    full_pops = full_pops.loc[:,['age_group_id','sex_id','year_id','location_id',
                                'population']]
    summary_df = pd.merge(left=summary_df,
                          right=full_pops,
                          on=['age_group_id','sex_id','year_id','location_id'],
                          how='inner')
    for col in ['val','lower','upper']:
        summary_df[col] = summary_df[col] / summary_df['population']
        summary_df.loc[summary_df[col]>1,col] = 1
        summary_df.loc[summary_df[col]<0,col] = 0
    summary_df = summary_df.drop(labels=['population'], axis=1)
    print("  ...Successfully transformed summary values into rate space.\n")
    return summary_df
Exemplo n.º 8
0
def main(decomp, version):
    start = help.start_timer()

    ratio_ecodes = inj_info.IM_RATIO_ECODES
    dems = db.get_demographics(gbd_team='cod', gbd_round_id=help.GBD_ROUND)
    sexes = dems['sex_id']
    mortyears = list([x for x in dems['year_id'] if x >= 1990])

    regmap = get_region_map(dems['location_id'])
    final_list = []
    for ecode in ratio_ecodes:
        year_arr_list = []
        for year in mortyears:
            sex_arr_list = []
            for sex in sexes:
                ratios = compute_ratio(ecode, decomp,
                                       str(versions.get_best_version(ecode)),
                                       year, sex)
                sex_arr_list.append(ratios)
            combined_sexes = xr.concat(sex_arr_list, 'sex_id')
            year_arr_list.append(combined_sexes)
        combined_years = xr.concat(year_arr_list, 'year_id')
        print(('Summarize {}'.format(ecode)))
        summarized = summarize(combined_years, regmap)
        final_list.append(summarized)
    final_ratios = xr.concat(final_list, pd.Index(ratio_ecodes, name='ecode'))

    write_results(final_ratios, decomp, version)

    help.end_timer(start)
Exemplo n.º 9
0
def copy_draws(draws_dir, meid):
    locs = []
    for f in glob.glob(f"{draws_dir}/{meid}/*.csv"):
        locs.append(int(f.rsplit("/")[-1][:-4]))
    study_locs = db.get_demographics("epi")["location_id"]
    loc_h = db.get_location_metadata(35)
    missing = [l for l in study_locs if l not in locs]

    zero_draws = pd.read_csv(f'{draws_dir}/{meid}/101.csv')
    draw_cols = zero_draws.columns[zero_draws.columns.str.contains("draw")]
    zero_draws[draw_cols] = zero_draws[draw_cols] * 0.0

    print(len(missing))
    for place in missing:
        if loc_h.loc[loc_h.location_id == place, "level"].values[0] == 3:
            zero_draws['location_id'] = place
            zero_draws.to_csv(f'{draws_dir}/{meid}/{place}.csv')
        elif loc_h.loc[loc_h.location_id == place, "level"].values[0] == 4:
            parent = loc_h.loc[loc_h.location_id == place,
                               "parent_id"].values[0]
            draws = pd.read_csv(f'{draws_dir}/{meid}/{parent}.csv')
            draws['location_id'] = place
            draws.to_csv(f'{draws_dir}/{meid}/{place}.csv')
        print(place)
    return None
Exemplo n.º 10
0
    def copy_and_backfill(self):
        prof_id_cret_old = self.me_map["cretinism"]["srcs"]
        old = self.me_dict[prof_id_cret_old].reset_index()

        # Handle year differences between gbd2016 and gbd2017
        old.loc[old.year_id == 2016, 'year_id'] = 2017
        # Handle Saudia Arabia
        loc_meta = get_location_metadata(location_set_id=35, gbd_round_id=4)
        saudia_id = 152
        saudia_sub_nats = loc_meta.loc[loc_meta.parent_id == saudia_id,
                                       'location_id'].tolist()
        saudi_arabia = old.loc[old.location_id.isin(saudia_sub_nats), :]
        saudi_arabia.loc[:, 'location_id'] = saudia_id
        saudi_arabia = saudi_arabia.drop_duplicates(keep='first')
        old = pd.concat([old, saudi_arabia], axis=0)

        # Handle other location differences between gbd2016 and gbd2017
        data_cols = self.draw_cols
        data_dct = {'data_cols': data_cols}
        index_cols = list(set(old.columns) - set(data_cols))
        index_cols.remove('location_id')
        demo = get_demographics(gbd_team='epi', gbd_round_id=5)
        index_dct = {
            tuple(index_cols):
            list(set(tuple(x) for x in old[index_cols].values)),
            'location_id': demo['location_id']
        }
        gbdizer = gbdize.GBDizeDataFrame(
            dimensionality.DataFrameDimensions(index_dct, data_dct))
        new = gbdizer.fill_location_from_nearest_parent(old,
                                                        location_set_id=35,
                                                        gbd_round_id=5)
        prof_id_cret_new = self.me_map["cretinism"]["trgs"]
        self.me_dict[prof_id_cret_new] = new
Exemplo n.º 11
0
def grab_prevalence_draws(me_id, year_id, gbd_round_id, decomp_step) -> pd.DataFrame:
    # grabs prevalence draws for the given year, me_id, and locations
    demo = get_demographics("epi", gbd_round_id=gbd_round_id)
    print(demo['age_group_id'], me_id)
    df = get_draws('modelable_entity_id', me_id, source='epi', measure_id=5,
                   location_id=demo['location_id'], year_id=year_id,
                   age_group_id=demo['age_group_id'], sex_id=demo['sex_id'],
                   gbd_round_id=gbd_round_id, decomp_step=decomp_step)
    return df
Exemplo n.º 12
0
def get_all_cause_mortality():
    dems = db.get_demographics(gbd_team = "epi", gbd_round_id = 6) 
    mortality = db.get_envelope(age_group_id = dems["age_group_id"],
        location_id = dems["location_id"], year_id = dems["year_id"], sex_id = dems["sex_id"], with_hiv = 1, rates = 1, decomp_step = ds, gbd_round_id = gbd_round_id)
    # calculate the standard error
    mortality["std"] = (mortality["upper"] - mortality["lower"])/3.92
    mortality.drop(['run_id'], axis = 1, inplace = True)
    filename = "all_cause_mortality.csv"
    mortality.to_csv(os.path.join(out_dir_rate, "02_temp/03_data", filename), index = False)
Exemplo n.º 13
0
def main() -> None:
    args = parse_args()
    user = getpass.getuser()
    today_string = datetime.date.today().strftime('%m%d%y')
    workflow = Workflow(
        workflow_args=f'anemia_malaria_{args.decomp_step}_{today_string}',
        name=f'anemia_malaria_{args.decomp_step}_{today_string}',
        description=
        f'Anemia: Malaria pre-processing for decomp {args.decomp_step}',
        project="proj_anemia",
        stderr="FILEPATH",
        stdout="FILEPATH",
        working_dir=path_to_directory,
        resume=True)

    # first submit the subtract clinical jobs
    subtract_tasks = []
    demo = get_demographics("epi", gbd_round_id=args.gbd_round_id)
    for loc in demo['location_id']:
        task = PythonTask(script="FILEPATH",
                          args=[
                              "--location_id", loc, "--gbd_round_id",
                              args.gbd_round_id, "--decomp_step",
                              args.decomp_step, "--out_dir", args.out_dir
                          ],
                          name=f"malaria_subtract_{loc}",
                          tag="malaria_subtract",
                          num_cores=2,
                          m_mem_free="8G",
                          max_attempts=3,
                          max_runtime_seconds=60 * 60 * 3,
                          queue='all.q')
        subtract_tasks.append(task)
    workflow.add_tasks(subtract_tasks)

    # once the new draws exist, save results
    for modelable_entity_id in [19390, 19394]:
        task = PythonTask(script="FILEPATH",
                          args=[
                              "--modelable_entity_id", modelable_entity_id,
                              "--gbd_round_id", args.gbd_round_id,
                              "--decomp_step", args.decomp_step, "--out_dir",
                              args.out_dir
                          ],
                          name=f"malaria_save_{modelable_entity_id}",
                          tag="malaria_save",
                          upstream_tasks=subtract_tasks,
                          num_cores=8,
                          m_mem_free="100G",
                          max_attempts=3,
                          max_runtime_seconds=60 * 60 * 24,
                          queue='all.q')
        workflow.add_task(task)

    status = workflow.run()
    print(f'Workflow finished with status {status}')
Exemplo n.º 14
0
def upload():
    folder = os.path.join('FILEPATH')
    dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND)
    save_results_epi(input_dir=folder,
                     input_file_pattern='FILEPATH.h5',
                     modelable_entity_id=3136,
                     description='other msk adjusted for injuries fractures and dislocations',
                     year_id=dems['year_id'],
                     measure_id=5,
                     mark_best=True)
    print('Successfully uploaded Other MSK')
Exemplo n.º 15
0
def get_locations():
    '''
    Description: get list of locations to iterate through for every part of the
    maternal custom process, down to one level of subnationals

    Args: None

    Output: (list) location_ids
    '''
    logger.info("Getting locations")
    locations_df = get_demographics(gbd_team="cod")["location_id"]
    return locations_df
Exemplo n.º 16
0
    def __init__(self, cluster_dir, year_id, input_me, output_me):
        '''This class incorporates all the functions that all the specific
        causes use, but all in different sequence'''
        self.cluster_dir = cluster_dir
        self.year_id = year_id
        self.input_me = input_me
        self.output_me = output_me
        self.conn_def = "cod"
        self.gbd_round = 5

        epi_demographics = get_demographics("epi", gbd_round_id=5)
        self.most_detailed_ages = epi_demographics['age_group_id']
        self.most_detailed_locs = epi_demographics['location_id']
Exemplo n.º 17
0
def fill_square(df, col, gbd_round_id):
    '''make data square across a column for a set of index columns'''
    demo = get_demographics(gbd_team='epi', gbd_round_id=gbd_round_id)
    draw_cols = list(df.filter(like='draw_').columns)
    index_cols = list(set(df.columns) - set(draw_cols))
    index_cols.remove(col)
    index_dct = {
        tuple(index_cols): list(set(tuple(x) for x in df[index_cols].values)),
        col: demo[col]
    }
    data_dct = {'draw_cols': draw_cols}
    gbdizer = gbdize.GBDizeDataFrame(
        dimensionality.DataFrameDimensions(index_dct, data_dct))
    return gbdizer.fill_empty_indices(df, 0)
Exemplo n.º 18
0
 def new_simulation_index(self, year_id):
     lt = dbtrees.loctree(
         location_set_version_id=self.location_set_version_id)
     location_id = [loc.id for loc in lt.leaves()]
     demo = get_demographics(gbd_team="epi", gbd_round_id=self.gbd_round_id)
     if not year_id:
         year_id = demo['year_id']
     simulation_index = {
         "year_id": year_id,
         "location_id": location_id,
         "sex_id": demo['sex_id'],
         "age_group_id": demo['age_group_id']
     }
     self.simulation_index = simulation_index
Exemplo n.º 19
0
    def __init__(self, cluster_dir, year_id, input_me, output_me, decomp_step):
        '''This class incorporates all the functions that all the specific
        causes use, but all in different sequence'''
        self.cluster_dir = cluster_dir
        self.year_id = year_id
        self.input_me = input_me
        self.output_me = output_me
        self.conn_def = "cod"
        self.gbd_round = gbd.GBD_ROUND_ID
        self.decomp_step = decomp_step

        epi_demographics = get_demographics("epi", gbd_round_id=self.gbd_round)
        self.most_detailed_ages = epi_demographics['age_group_id']
        self.most_detailed_locs = epi_demographics['location_id']
        '''get_demographics should return the most detailed demographics for 
Exemplo n.º 20
0
def get_summary_data(infile,encoding,cause):

    print("**GETTING SUMMARY DATA*** {}".format(get_time_now()))
    full_data = pd.read_csv(infile, encoding=encoding)
    summary_df = full_data.loc[full_data['cause_id']==cause,:]
    # Data assertions
    required_cols = ['year_id','location_id','sex_id','age_group_id',
                 'val','lower','upper']
    for col in required_cols:
        assert col in summary_df.columns, "Required column '{}' not in data".format(col)
    demos = get_demographics(gbd_team='cod',gbd_round_id=5)
    years = list(range(1950,2018))
    summary_df = summary_df.loc[summary_df['location_id'].isin(demos['location_id']),:]
    summary_df = summary_df.loc[summary_df['year_id'].isin(years),:]
    print("  ...Summary data pulled. {}\n".format(get_time_now()))
    return summary_df
Exemplo n.º 21
0
def adj_data_template(df):
    print("adjusting years")
    # Returns the closest year that contains GBD results
    # Create the year_id column and set it to a year that contains GBD results
    df = templating.df_mean(df, "year_id", ["year_start", "year_end"])
    gbd_years = get_demographics('epi')['year_id']
    #gbd_years = [1990,1995,2000,2005,2010,2015,2017]
    df['year_id'] = df['year_id'].apply(
        lambda x: min(gbd_years, key=lambda y: abs(y - x)))

    print("Adjusting sexes")
    # subset out demographics
    if 'sex_id' in df.columns:
        sex_dict = {1: 'Male', 2: 'Female', 3: 'Both'}
        df['sex'] = df.apply(lambda x: sex_dict[x['sex_id']], axis=1)
    else:
        sex_dict = {
            'Male': 1,
            'male': 1,
            'Female': 2,
            'female': 2,
            'Both': 3,
            'both': 3
        }
        df['sex_id'] = df.apply(lambda x: sex_dict[x['sex']], axis=1)

    print("Adjusting ages")

    df = df.loc[((df["age_end"] - df["age_start"]) < 40) |  # > 20 age group
                (df["age_start"] >= 80)]  # or terminal
    #| (df["age_end"] ==125)

    # get age mapping
    age_map = get_age_metadata(age_group_set_id=12)[[
        'age_group_id', 'age_group_years_start', 'age_group_years_end'
    ]]

    # find the intersection between the in dataframe and the age group df
    df = adjust_span(df, ('age_start', 'age_end'), age_map,
                     ('age_group_years_start', 'age_group_years_end'))

    if df.empty:
        raise NoNonZeroValues

    return df
Exemplo n.º 22
0
def main(in_dir, out_dir):

    # grab the demographics to loop over when reading in all of the files
    dems = db.get_demographics(gbd_team="epi")

    # read in all of the files
    data = []
    #test run
    #for loc in [10, 101]:
    for loc in dems['location_id']:
        print "Reading {}".format(loc)
        for year in dems['year_id']:
            for sex in dems['sex_id']:
                df = read_data(functional, outcome, loc, year, sex)
                data.append(df)

    # collapse the raw files so that it is just one value
    raw = pd.concat(data)  # puts all of the little dfs into one big df
    collapsed = collapse(
        raw)  # collapse it because we don't care about loc/year/sex

    # format the value file and then append to the collapsed values
    value = get_values()
    result = collapsed.append(value)

    # output the results to value_in directory
    folder = os.path.join(out_dir, "value_in")
    if not os.path.exists(folder):
        os.makedirs(folder)

    filepath = os.path.join(
        folder,
        "value_in_{functional}_{outcome}.csv".format(functional=functional,
                                                     outcome=outcome))
    result.to_csv(filepath, index=False)

    ## create a finished.txt for check system
    finished = []
    checks = os.path.join(out_dir, "value_in/checks")
    if not os.path.exists(checks):
        os.makedirs(checks)
    np.savetxt(
        os.path.join(checks, "finished_{}_{}.txt".format(functional, outcome)),
        finished)
Exemplo n.º 23
0
    def get_data(self, model_version_id):
        demo_query = """
        SELECT
            t3mvd.model_version_dismod_id as {data_key},
            t3mvd.location_id,
            t3mvd.sex_id,
            t3mvd.year_start,
            t3mvd.year_end,
            t3mvd.age_start,
            t3mvd.age_end,
            t3mvd.measure_id,
            t3mvd.nid,
            t3mvd.underlying_nid,
            t3mvd.outlier_type_id
        FROM
            epi.t3_model_version_dismod t3mvd
        WHERE
            t3mvd.model_version_id = {model_version_id}
        """.format(data_key=self._data_key, model_version_id=model_version_id)
        df = ezfuncs.query(demo_query,
                           conn_def=envs.Environment.get_odbc_key())

        # subset out demographics
        df = df.loc[df["sex_id"] != 3]  # get rid of both sex
        df = df.loc[((df["age_end"] - df["age_start"]) < 20)
                    |  # > 20 age group
                    (df["age_start"] >= 80)]  # or terminal

        if df.empty:
            raise NoNonZeroValues

        # Add a year_id column
        df['year_mid'] = (df['year_start'] + df['year_end']) / 2
        gbd_years = get_demographics('epi')['year_id']
        df['year_id'] = df['year_mid'].apply(
            lambda x: min(gbd_years, key=lambda y: abs(y - x)))

        df = df.drop(labels=['year_mid'], axis=1)
        # set index
        df[self._data_key] = df[self._data_key].astype(int)
        df = df.set_index(self._data_key)
        return df
Exemplo n.º 24
0
def main(ecode, year_id, sex_id, decomp, version):
    tic = time.time()
    version = version.rstrip()

    dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND)

    env_version = versions.get_env(ecode, version)
    pop_path = paths.DATA_DIR / f"flats/{env_version}/pops.nc"
    pops = xr.open_dataset(pop_path)
    pops = pops.loc[{'year_id': [year_id], 'sex_id': [sex_id]}]

    version = version.rstrip()
    ratio_file = xr.open_dataarray(
        os.path.join(paths.DATA_DIR, decomp, 'inc_mortality_ratios',
                     str(versions.get_crv(ecode, version)), 'ratios.nc'))

    regmap = get_region_map(dems['location_id'])
    incidence = get_shock_inc(ecode, pops, dems, year_id, sex_id, decomp,
                              ratio_file, regmap)

    write_results(incidence, ecode, decomp, version, year_id, sex_id)
Exemplo n.º 25
0
    def _add_super_squeeze_task(self, node):
        logging.info(f"Adding {node} task")

        # make output directories
        self._create_output_directories(self.pgraph.nodes[node]["outs"])

        # get dependency_list before parallelizing since the
        # dependencies are the same for each parallelized demographic
        dep_list = get_dependencies(node, self.pgraph, self._task_registry)

        epi_demo = get_demographics("epi", gbd_round_id=self.gbd_round_id)
        for location_id in epi_demo[Params.LOCATION_ID]:
            for year_id in self.YEAR_IDS:
                for sex_id in epi_demo[Params.SEX_ID]:
                    ss_task = self._super_squeeze_fac.get_task(
                        node=node,
                        output_dir=self.DATA_DIR,
                        location_id=location_id,
                        year_id=year_id,
                        sex_id=sex_id,
                        decomp_step=self.decomp_step,
                        n_draws=self.N_DRAWS,
                        dependency_list=dep_list)
                    self.workflow.add_task(ss_task)
                    self._task_registry[SuperSqueezeFactory.get_task_name(
                        node, location_id, year_id, sex_id)] = ss_task

        ss_upstream = [
            self._task_registry[t] for t in list(self._task_registry.keys())
            if DAG.Tasks.SUPER_SQUEEZE in t
        ]
        description = (
            f"Super_Squeeze_auto_mark_{Params.DESCRIPTION_MAP[self.N_DRAWS]}")
        measure_id = [gbd.measures.PREVALENCE]
        for meid in self.pgraph.nodes[node]["outs"]:
            self._add_save_task(
                meid, "{location_id}/{measure_id}_{year_id}_{sex_id}.h5",
                description, measure_id, self.YEAR_IDS, self.N_DRAWS,
                ss_upstream)
Exemplo n.º 26
0
def get_hale_ages(gbd_round_id: int) -> Tuple[List[int], List[int]]:
    """
    Pull age group IDs used in HALE outputs and age group IDs used to aggregate
    under-one ages.
    """
    # Get age group IDs used in this GBD round and sort chronologically.
    demo_df = db_queries.get_demographics('epi', gbd_round_id)
    age_group_ids = demo_df[columns.AGE_GROUP_ID]
    age_spans = get_age_spans()\
        .query(f'{columns.AGE_GROUP_ID} in @age_group_ids')\
        .sort_values(columns.AGE_GROUP_YEARS_START)

    # Get under-one age groups (including birth) and full list of HALE age
    # groups.
    under_one_ages = [age_groups.BIRTH] + age_spans.loc[
        age_spans[columns.AGE_GROUP_YEARS_START] < 1, columns.AGE_GROUP_ID
    ].tolist()
    hale_ages = [age_groups.UNDER_ONE] + age_spans.loc[
        ~age_spans[columns.AGE_GROUP_ID].isin(under_one_ages),
        columns.AGE_GROUP_ID
    ].tolist()

    return hale_ages, under_one_ages
Exemplo n.º 27
0
######################################################


#import modules

import db_queries as db
import pandas as pd
import numpy as np
import os
import copy
import sys
import time
import shutil 

#import demographics, set directories, set variables 
dems = db.get_demographics(gbd_team = "epi")
location_ids = dems['location_id']

code_dir = "FILEPATH"
shell =  "FILEPATH"

in_dir = "FILEPATH"
in_dir_data = "FILEPATH" + date + "FILEPATH"
in_dir_value = "FILEPATH" + date + "FILEPATH"
in_dir_rate = "FILEPATH" + date + "FILEPATH"
out_dir_ODE = "FILEPATH" + date + "FILEPATH"

outcomes = ["epilepsy", "long_modsev"]
functional = "encephalitis"

##delete / recreate checks folder, log start time
Exemplo n.º 28
0
######################################################

# import modules

import db_queries as db
import pandas as pd
import numpy as np
import os
import copy
import sys
import time
import shutil

#import demographics, set directories, set variables 

dems = db.get_demographics(gbd_team = "epi", gbd_round_id = gbd_round)
location_ids = dems['location_id']

# test dems
# location_ids = [44651]

#create test variables !! Do not forget to open parallel file and select one year and sex if trying to replicate one file

code_dir = # filepath
shell =  code_dir +"python_shell.sh"

in_dir = # filepath
in_dir_data = # filepath
in_dir_value = # filepath
in_dir_rate = # filepath
out_dir_ODE = # filepath
Exemplo n.º 29
0
def get_age_groups():
    return get_demographics('cod')['age_group_ids']
Exemplo n.º 30
0
class SquareImport(object):
    _epi_demographics = get_demographics("epi", gbd_round_id=5)
    _idx_dmnsns = {
        "year_id": _epi_demographics['year_id'],
        "age_group_id": _epi_demographics['age_group_id'],
        "sex_id": [1, 2],
        "location_id": _epi_demographics['location_id'],
        "measure_id": [5, 6]
    }

    _draw_cols = ["draw_{i}".format(i=i) for i in range(0, 1000)]

    def __init__(self, idx_dmnsns=None, draw_cols=None):

        if idx_dmnsns is None:
            self.idx_dmnsns = collections.OrderedDict(
                sorted(self.default_idx_dmnsns.items()))
        else:
            self.idx_dmnsns = collections.OrderedDict(
                sorted(idx_dmnsns.items()))

        if draw_cols is None:
            self.draw_cols = self.default_draw_cols
        else:
            self.draw_cols = draw_cols

        # expected index
        self.index_df = self.get_index_df()

    @ClassProperty
    @classmethod
    def default_idx_dmnsns(cls):
        return cls._idx_dmnsns.copy()

    @ClassProperty
    @classmethod
    def default_draw_cols(cls):
        return cls._draw_cols[:]

    def get_index_df(self):
        """create template index for square dataset"""
        idx = pd.MultiIndex.from_product(self.idx_dmnsns.values(),
                                         names=self.idx_dmnsns.keys())
        return pd.DataFrame(index=idx)

    def import_square(self, gopher_what, source, filler=None, **kwargs):
        """get draws for the specified modelable entity by dimensions"""
        if not kwargs:
            kwargs = self.idx_dmnsns.copy()

        if filler is None:
            filler = 0

        df = get_draws(gbd_id_type=gopher_what.keys(),
                       gbd_id=gopher_what.values(),
                       source=source,
                       measure_id=kwargs['measure_id'],
                       location_id=kwargs['location_id'],
                       year_id=kwargs['year_id'],
                       age_group_id=kwargs['age_group_id'],
                       sex_id=kwargs['sex_id'],
                       gbd_round_id=5)

        for c in self.idx_dmnsns.keys():
            df[c] = pd.to_numeric(df[c])
        df = df.set_index(self.idx_dmnsns.keys())
        df = df[self.draw_cols]
        df = pd.concat([self.index_df, df], axis=1)
        df.fillna(value=filler, inplace=True)
        return df