Exemplo n.º 1
0
def save_mortality(ecode, year_id, sex_id, locs, ages, decomp, version):
    cause_id = help.get_cause(ecode)
    draws = gd.get_draws(
        gbd_id_type="cause_id",
        gbd_id=cause_id,
        location_id=locs,
        year_id=year_id,
        sex_id=sex_id,
        age_group_id=ages,
        status="best",
        source="codem",
        gbd_round_id=help.GBD_ROUND,
        decomp_step=decomp
    )

    draws[help.drawcols()] = draws[help.drawcols()].divide(draws['pop'], axis=0)
    draws.drop(['pop', 'envelope', 'cause_id', 'sex_name', 'measure_id', 'metric_id'], axis=1, inplace=True)
    draws.set_index(['location_id','year_id','sex_id','age_group_id'], inplace=True)
    mort = etl.df_to_xr(draws, wide_dim_name='draw', fill_value=np.nan)

    filename = "mort_{}_{}.nc".format(str(year_id), str(sex_id))
    version = version.rstrip()
    folder = os.path.join(paths.DATA_DIR, decomp, inj_info.ECODE_PARENT[ecode], version, 'mortality_for_shocks')
    if not os.path.exists(folder):
        try:
            os.makedirs(folder)
        except OSError as e:
            if e.errno != os.errno.EEXIST:
                raise
            pass
    filepath = os.path.join(folder, filename)
    mort.to_netcdf(filepath)
Exemplo n.º 2
0
def save_mortality(ecode, year_id, sex_id, locs, ages, version):
    cause_id = help.get_cause(ecode)
    draws = gd.get_draws(
        gbd_id_type="cause_id",
        gbd_id=cause_id,
        location_id=locs,
        year_id=year_id,
        sex_id=sex_id,
        age_group_id=ages,
        status="best",
        source="codem",
        gbd_round_id=help.GBD_ROUND
    )

    draws[help.drawcols()] = draws[help.drawcols()].divide(draws['pop'], axis=0)
    draws.drop(['pop', 'envelope', 'cause_id', 'sex_name', 'measure_id', 'metric_id'], axis=1, inplace=True)
    draws.set_index(['location_id','year_id','sex_id','age_group_id'], inplace=True)
    mort = etl.df_to_xr(draws, wide_dim_name='draw', fill_value=np.nan)

    filename = "FILEPATH.nc".format(str(year_id), str(sex_id))
    folder = os.path.join("FILEPATH")
    if not os.path.exists(folder):
        try:
            os.makedirs(folder)
        except OSError as e:
            if e.errno != os.errno.EEXIST:
                raise
            pass
    filepath = os.path.join(folder, filename)
    print("Writing mortality")
    mort.to_netcdf(filepath)
Exemplo n.º 3
0
def smr(ncode):
    smr = pd.read_csv(os.path.join(paths.INPUT_DIR,
                                   'FILEPATH.csv')).drop('name', axis=1)

    if ncode == "N48":
        smr = smr.loc[smr["ncode"] == "N9"]
    else:
        smr = smr.loc[smr["ncode"] == ncode]
    smr["se"] = (smr["UL"] - smr["LL"]) / 3.92

    smr["ncode"] = ncode

    # generate draws of SMR
    smr.reset_index(
        drop=True, inplace=True
    )  # need to reset index so that the random draws will line up
    np.random.seed(659177)
    smr[help.drawcols()] = pd.DataFrame(
        np.random.normal(smr['SMR'], smr['se'], size=(1000, len(smr))).T)
    smr.drop(['SMR', 'UL', 'LL', 'se'], inplace=True, axis=1)

    smr = help.convert_to_age_group_id(smr, collapsed_0=False)
    smr.set_index(['ncode', 'age_group_id'], inplace=True)
    smr[smr < 1] = 1
    return etl.df_to_xr(smr, wide_dim_name='draw', fill_value=np.nan)
Exemplo n.º 4
0
def spinal_split(df, ecode, ncode, year_id, decomp, version):
    spinal_split_folder = ("FILEPATH")
    drawdict = {'prop_' + d: d for d in help.drawcols()}
    parent = inj_info.ECODE_PARENT[ecode]
    filename = f"36_{year_id}.h5"

    for s in ['a', 'b', 'c', 'd']:
        split_prop = pd.read_csv(spinal_split_folder / f"prop_{s}.csv")
        split_prop.rename(columns=drawdict, inplace=True)
        split_prop.drop('acause', axis=1, inplace=True)

        result = df * split_prop.loc[0]
        result.reset_index(inplace=True)

        version = version.rstrip()
        out_dir = (paths.DATA_DIR / decomp / parent / str(version) / 'upload' /
                   ecode / ncode + s)
        if not os.path.exists(out_dir):
            try:
                os.makedirs(out_dir)
            except OSError as e:
                if e.errno != os.errno.EEXIST:
                    raise
                pass

        result.to_hdf(
            out_dir / filename,
            'draws',
            mode='w',
            format='table',
            data_columns=['location_id', 'year_id', 'sex_id', 'age_group_id'])
Exemplo n.º 5
0
def spinal_split(df, ecode, ncode, year_id, version):
    spinal_split_folder = 'FILEPATH'
    drawdict = {'prop_' + d: d for d in help.drawcols()}
    parent = inj_info.ECODE_PARENT[ecode]
    filename = 'FILEPATH.h5'.format(year_id)

    for s in ['a', 'b', 'c', 'd']:
        # load proportion draws
        split_prop = pd.read_csv(
            os.path.join(spinal_split_folder, 'FILEPATH.csv'))
        split_prop.rename(columns=drawdict, inplace=True)
        split_prop.drop('acause', axis=1, inplace=True)

        # split the ncode
        result = df * split_prop.loc[0]
        result.reset_index(inplace=True)

        # save
        out_dir = os.path.join("FILEPATH")
        if not os.path.exists(out_dir):
            try:
                os.makedirs(out_dir)
            except OSError, e:
                if e.errno != os.errno.EEXIST:
                    raise
                pass

        result.to_hdf(
            os.path.join(out_dir, filename),
            'draws',
            mode='w',
            format='table',
            data_columns=['location_id', 'year_id', 'sex_id', 'age_group_id'])
Exemplo n.º 6
0
def create_lt_grid(platform, ages):
    ncodes = [
        x for x in inj_info.get_lt_ncodes(platform)
        if x not in inj_info.ST_NCODES
    ]
    grid = pd.DataFrame(help.expandgrid(ncodes, ages))
    grid.columns = ["ncode", "age_gr"]
    grid["platform"] = platform

    for draw in help.drawcols():
        grid[draw] = 1
    return grid
Exemplo n.º 7
0
def create_lt_grid(platform, ages):
    """Creates a grid of N-codes that should have 100% long-term probabilities."""
    ncodes = [
        x for x in inj_info.get_lt_ncodes(platform)
        if x not in inj_info.ST_NCODES
    ]
    grid = pd.DataFrame(help.expandgrid(ncodes, ages))
    grid.columns = ["ncode", "age_gr"]
    grid["platform"] = platform
    # set all of them to 1.0 lt probability
    for draw in help.drawcols():
        grid[draw] = 1
    return grid
Exemplo n.º 8
0
def main(ecode, year_id, sex_id, version):
    tic = time.time()
    me_id = help.get_me(ecode)
    
    m_draws = get_measures(ecode, me_id, year_id, sex_id, version)
    
    x_inc = etl.df_to_xr(m_draws['incidence'], wide_dim_name='draw', fill_value=np.nan)
    x_rem = etl.df_to_xr(m_draws['remission'], wide_dim_name='draw', fill_value=np.nan)
    x_emr = etl.df_to_xr(m_draws['emr'], wide_dim_name='draw', fill_value=np.nan)
    
    otp_cov = outpatient_cov(me_id, help.drawcols())
    
    adjusted_inc = calculate_measures.short_term_incidence_unsplit(x_inc, x_rem, x_emr, otp_cov)

    write_results(adjusted_inc, ecode, version, year_id, sex_id)
    toc = time.time()
    total = toc - tic
    print("Total time was {} seconds".format(total))
Exemplo n.º 9
0
def main(ecode, ncode, platform, year, decomp, version, flat_version):
    toc = time.time()

    dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND)
    dm_settings = os.path.join(paths.SHARE_DIR, 'dismod_settings')
    version = version.rstrip()
    dm_dir = os.path.join(paths.DATA_DIR, decomp, inj_info.ECODE_PARENT[ecode],
                          str(version), "dismod_ode", ecode)
    metaloc = db.get_location_metadata(location_set_id=35,
                                       gbd_round_id=help.GBD_ROUND)

    filepath = write_path(ecode, ncode, platform, year, decomp, version)
    locations = help.ihme_loc_id_dict(metaloc, dems['location_id'])

    alldata = []
    value_in = os.path.join(dm_dir, "value_in",
                            "value_in_{}_{}.csv".format(ncode, platform))
    draw_in = os.path.join(dm_settings, "draw_in.csv")
    plain_in = os.path.join(dm_settings, "plain_in.csv")
    effect_in = os.path.join(dm_settings, "effect_in.csv")

    v_in = pd.read_csv(value_in)

    num_locs = len(locations)
    loc_pos = 0
    initime = help.start_timer()
    for locn in locations:
        loc_pos = loc_pos + 1

        for sex in [1, 2]:

            start = help.start_timer()

            if float(v_in.loc[v_in['name'] == 'eta_incidence',
                              'value'][0]) == 0:
                result = pd.DataFrame({'age_group_id': dems['age_group_id']})
                result = result.assign(**{d: 0 for d in help.drawcols()})
                result = help.convert_from_age_group_id(result)
            else:
                data_in = os.path.join(
                    dm_dir, "data_in", locations[locn], str(year), str(sex),
                    ecode, "data_in_{}_{}.csv".format(ncode, platform))

                if ncode in inj_info.EMR_NCODES:
                    rate_in_name = "rate_in_emr.csv"
                else:
                    rate_in_name = "rate_in_no_emr.csv"
                rate_in = os.path.join(paths.DATA_DIR, 'flats',
                                       str(flat_version), 'rate_in', str(year),
                                       str(sex), locations[locn], rate_in_name)

                draw_out_dir = os.path.join(dm_dir,
                                            "prev_results", locations[locn],
                                            str(year), str(sex))
                draw_out = os.path.join(
                    draw_out_dir,
                    "prevalence_{}_{}.csv".format(ncode, platform))
                if not os.path.exists(draw_out_dir):
                    try:
                        os.makedirs(draw_out_dir)
                    except OSError as e:
                        if e.errno != os.errno.EEXIST:
                            raise
                        pass

                result = run_model_injuries(draw_in, data_in, value_in,
                                            plain_in, rate_in, effect_in,
                                            draw_out, 1000)

            result['location_id'] = locn
            result['platform'] = platform

            result['year_id'] = year
            result['sex_id'] = sex

            alldata.append(result)
            help.end_timer(start)
            sys.stdout.flush()  # write to log file
        total_time = (time.time() - initime) / 60.

    final = pd.concat(alldata)

    write_results(final, ecode, ncode, platform, year, decomp, version)
    tic = time.time()
Exemplo n.º 10
0
u_dw = etl.df_to_xr(untreat_dw, wide_dim_name='draw', fill_value=np.nan)
t_dw = etl.df_to_xr(treated_dw, wide_dim_name='draw', fill_value=np.nan)

dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND)

# Get the percent treated in each country-year, and multiply by dws to get total dw
p_t = calculate_measures.pct_treated()
dw = t_dw * p_t + u_dw * (1 - p_t)

# Load in split proportions for spinal cord injuries and find weighted average disability weight among the 4 splits
n_parent = pd.Series(index=treated_dw.index,
                     data=[n[0:3] for n in treated_dw.index],
                     name='ncode_parent')
spinal_split_folder = 'FILEPATH'
drawdict = {'prop_' + d: d for d in help.drawcols()}
split_props_list = []
for s in ['a', 'b', 'c', 'd']:
    # load proportion draws
    split_prop = pd.read_csv(os.path.join('FILEPATH.csv'))
    split_prop.rename(columns=drawdict, inplace=True)
    split_prop.drop('acause', axis=1, inplace=True)
    for n in ['N33', 'N34']:
        split_props_list.append(split_prop.rename({0: n + s}))

split_props = pd.concat(split_props_list)
split_props.index.rename('ncode', inplace=True)
other_ncodes = pd.DataFrame(index=[n for n in treated_dw.index if len(n) < 4],
                            columns=help.drawcols(),
                            data=1)
other_ncodes.loc[['N33', 'N34']] = 0
Exemplo n.º 11
0
def lognormal():
    # load file
    filepath = 'FILEPATH.xlsx'
    raw = pd.read_excel(filepath,
                        sheet_name='short-term durations',
                        header=None,
                        skiprows=9,
                        index_col=0)

    # subset to the right data
    inpatient = raw[[2, 3, 4, 5]]
    outpatient = raw[[6, 7, 8, 9]]
    mults = raw.reset_index()[[0, 10, 11, 12]]

    inpatient.rename(columns={
        2: 'mean',
        3: 'se',
        4: 'll',
        5: 'ul'
    },
                     inplace=True)
    outpatient.rename(columns={
        6: 'mean',
        7: 'se',
        8: 'll',
        9: 'ul'
    },
                      inplace=True)
    mults.rename(columns={
        0: 'ncode',
        10: 'mean',
        11: 'll',
        12: 'ul'
    },
                 inplace=True)

    treated = pd.concat([inpatient, outpatient],
                        keys=['inpatient', 'outpatient'],
                        names=['platform', 'ncode'])

    # create SE where it doesn't already exist
    treated['se'] = treated['se'].fillna(
        (treated['ul'] - treated['ll']) / 3.92)
    mults['se'] = (mults['ul'] - mults['ll']) / 3.92

    # convert to years
    treated = treated / 365.25

    treated['mu'] = np.log(
        (treated['mean']**2) / np.sqrt(treated['se']**2 + treated['mean']**2))
    treated['sig'] = np.sqrt(np.log(1 + (treated['se'] / treated['mean'])**2))

    # make draws
    treated.reset_index(
        inplace=True)  # need to reset index to make it line up with draws
    np.random.seed(81112)
    treated[help.drawcols()] = pd.DataFrame(
        np.random.lognormal(treated['mu'],
                            treated['sig'],
                            size=(1000, len(treated))).T)
    mults[help.drawcols()] = pd.DataFrame(
        np.random.normal(mults['mean'], mults['se'],
                         size=(1000, len(mults))).T)

    # format
    treated.drop(['mean', 'se', 'll', 'ul', 'mu', 'sig'], axis=1, inplace=True)
    treated.set_index(['ncode', 'platform'], inplace=True)
    mults.drop(['mean', 'se', 'll', 'ul'], axis=1, inplace=True)
    mults.set_index(['ncode'], inplace=True)

    # make untreated
    mults[mults < 0] = 0  # multipliers can't be negative
    treated[treated > 1] = 1  # short term, so no longer than one year
    untreated = treated * mults
    untreated[untreated > 1] = 1

    outdir = 'FILEPATH'
    treated.sort_index().to_csv(os.path.join(outdir, 'FILEPATH.csv'))
    untreated.sort_index().to_csv(os.path.join(outdir, 'FILEPATH.csv'))
Exemplo n.º 12
0
u_dw = etl.df_to_xr(untreat_dw, wide_dim_name='draw', fill_value=np.nan)
t_dw = etl.df_to_xr(treated_dw, wide_dim_name='draw', fill_value=np.nan)


dems = db.get_demographics(gbd_team='epi', gbd_round_id=help.GBD_ROUND)


decomp = 'step1'
p_t = calculate_measures.pct_treated(decomp)
dw = t_dw * p_t + u_dw * (1 - p_t)


n_parent = pd.Series(index=treated_dw.index, data=[n[0:3] for n in treated_dw.index], name='ncode_parent')
spinal_split_folder = "FILEPATH"
drawdict = {'prop_' + d: d for d in help.drawcols()}
split_props_list = []
for s in ['a', 'b', 'c', 'd']:
    split_prop = pd.read_csv(os.path.join(spinal_split_folder, 'prop_' + s + '.csv'))
    split_prop.rename(columns=drawdict, inplace=True)
    split_prop.drop('acause', axis=1, inplace=True)
    for n in ['N33', 'N34']:
        split_props_list.append(split_prop.rename({0: n+s}))

split_props = pd.concat(split_props_list)
split_props.index.rename('ncode', inplace=True)
other_ncodes = pd.DataFrame(index=[n for n in treated_dw.index if len(n) < 4], columns=help.drawcols(), data=1)
other_ncodes.loc[['N33', 'N34']] = 0
other_ncodes.index.rename('ncode', inplace=True)
weight = etl.df_to_xr(split_props.append(other_ncodes), wide_dim_name='draw', fill_value=np.nan)
Exemplo n.º 13
0
def main(ecode, ncode, platform, year, version, flat_version):
    dems = db.get_demographics(gbd_team="epi", gbd_round_id=help.GBD_ROUND)
    dm_settings = os.path.join("FILEPATH")
    dm_dir = os.path.join("FILEPATH")
    metaloc = db.get_location_metadata(location_set_id=35, gbd_round_id=help.GBD_ROUND)

    locations = help.ihme_loc_id_dict(metaloc, dems['location_id'])
    
    alldata = []
    
    value_in = os.path.join("FILEPATH.csv".format(ncode, platform))
    draw_in = os.path.join(dm_settings, "FILEPATH.csv")
    plain_in = os.path.join(dm_settings, "FILEPATH.csv")
    effect_in = os.path.join(dm_settings, "FILEPATH.csv")
    v_in = pd.read_csv(value_in)
    
    num_locs = len(locations)
    loc_pos = 0
    initime = help.start_timer()
    for locn in locations:
        loc_pos = loc_pos + 1

        for sex in [1,2]:

            print("Running DisMod ODE for location {} year {} sex {}".format(locations[locn], year, sex))

            start = help.start_timer()
            
            if float(v_in.loc[v_in['name']=='eta_incidence','value'][0]) == 0:
                print('eta incidence is 0, so all incidence should be 0 and we\'ll just make an all 0 df instead of '
                      'running the ODE')
                result = pd.DataFrame({'age_group_id': dems['age_group_id']})
                result = result.assign(**{d: 0 for d in help.drawcols()})
                result = help.convert_from_age_group_id(result)
            else:
                data_in = os.path.join("FILEPATH.csv".format(ncode, platform))
                
                # create the rate in filepath based on whether it has excess mortality or not
                if ncode in inj_info.EMR_NCODES:
                    rate_in_name = "FILEPATH.csv"
                else:
                    rate_in_name = "FILEPATH.csv"
                rate_in = os.path.join("FILEPATH")
                
                draw_out_dir = os.path.join("FILEPATH")
                draw_out = os.path.join("FILEPATH.csv".format(ncode, platform))
                if not os.path.exists(draw_out_dir):
                    try:
                        os.makedirs(draw_out_dir)
                    except OSError as e:
                        if e.errno != os.errno.EEXIST:
                            raise
                        pass
                
                result = run_model_injuries(draw_in, data_in, value_in, plain_in, rate_in, effect_in, draw_out, 1000)
                
            # format the results so that we have the identifying columns
            result['location_id'] = locn
            result['platform'] = platform
            
            result['year_id'] = year
            result['sex_id'] = sex
            
            alldata.append(result)
            help.end_timer(start)
            sys.stdout.flush()  # write to log file
        total_time = (time.time() - initime)/60.
        print('Completed {} of {} locations in {} minutes. Will take {} more minutes at this rate'.format(
            loc_pos, num_locs, total_time, (total_time/loc_pos)*(num_locs-loc_pos)))
        sys.stdout.flush()  # write to log file
        
    # concatenate all of the data together
    final = pd.concat(alldata)
    write_results(final, ecode, ncode, platform, year, version)
    print('Finished!')
Exemplo n.º 14
0
def lognormal():
    filepath = "FILEPATH"
    raw = pd.read_excel(filepath,
                        sheet_name='short-term durations',
                        header=None,
                        skiprows=9,
                        index_col=0)

    # subset to the right data
    inpatient = raw[[2, 3, 4, 5]]
    outpatient = raw[[6, 7, 8, 9]]
    mults = raw.reset_index()[[0, 10, 11, 12]]

    inpatient.rename(columns={
        2: 'mean',
        3: 'se',
        4: 'll',
        5: 'ul'
    },
                     inplace=True)
    outpatient.rename(columns={
        6: 'mean',
        7: 'se',
        8: 'll',
        9: 'ul'
    },
                      inplace=True)
    mults.rename(columns={
        0: 'ncode',
        10: 'mean',
        11: 'll',
        12: 'ul'
    },
                 inplace=True)

    treated = pd.concat([inpatient, outpatient],
                        keys=['inpatient', 'outpatient'],
                        names=['platform', 'ncode'])

    treated['se'] = treated['se'].fillna(
        (treated['ul'] - treated['ll']) / 3.92)
    mults['se'] = (mults['ul'] - mults['ll']) / 3.92

    treated = treated / 365.25

    treated['mu'] = np.log(
        (treated['mean']**2) / np.sqrt(treated['se']**2 + treated['mean']**2))
    treated['sig'] = np.sqrt(np.log(1 + (treated['se'] / treated['mean'])**2))

    treated.reset_index(inplace=True)
    np.random.seed(81112)
    treated[help.drawcols()] = pd.DataFrame(
        np.random.lognormal(treated['mu'],
                            treated['sig'],
                            size=(1000, len(treated))).T)
    mults[help.drawcols()] = pd.DataFrame(
        np.random.normal(mults['mean'], mults['se'],
                         size=(1000, len(mults))).T)

    treated.drop(['mean', 'se', 'll', 'ul', 'mu', 'sig'], axis=1, inplace=True)
    treated.set_index(['ncode', 'platform'], inplace=True)
    mults.drop(['mean', 'se', 'll', 'ul'], axis=1, inplace=True)
    mults.set_index(['ncode'], inplace=True)

    mults[mults < 0] = 0
    treated[treated > 1] = 1
    untreated = treated * mults
    untreated[untreated > 1] = 1

    outdir = "FILEPATH"
    treated.sort_index().to_csv(
        os.path.join(outdir, 'durs_treated_test_log.csv'))
    untreated.sort_index().to_csv(
        os.path.join(outdir, 'durs_untreated_test_log.csv'))