Exemplo n.º 1
0
def custom_interpolate(df):
    """Interpolate attributable burden draws"""
    draw_cols = ['draw_{}'.format(i) for i in xrange(1000)]
    id_cols = list(set(df.columns) - (set(draw_cols + ['year_id'])))
    dfs = []
    for year_range in [[1990, 1995], [1995, 2000], [2000, 2005], [2005, 2010]]:
        start_df = (df.ix[df.year_id == year_range[0]].sort_values(id_cols)
                    .reset_index(drop=True))
        end_df = (df.ix[df.year_id == year_range[1]].sort_values(id_cols)
                  .reset_index(drop=True))
        ydf = interpolate(start_df, end_df, id_cols, 'year_id', draw_cols,
                          year_range[0], year_range[1])
        dfs.append(ydf.query('year_id < {}'.format(year_range[1])))
    df = pd.concat(dfs)
    return df
Exemplo n.º 2
0
def interp_loc(modelable_entity_id, measure_id, location_id, outpath):
    start_year = 1980
    epi_start_year = 1990
    end_year = 2015
    rank_year = 2005

    # Retrieve epi draws and interpolate
    epi_draws = []
    for y in range(epi_start_year, end_year + 1, 5):
        d = gopher.draws({'modelable_entity_ids': [modelable_entity_id]},
                         year_ids=[y],
                         location_ids=[location_id],
                         measure_ids=[measure_id],
                         verbose=False,
                         source="dismod",
                         age_group_ids=[
                             2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                             16, 17, 18, 19, 20, 21
                         ])
        assert len(d) > 0, (
            "Uh oh, couldn't find epi draws. Make sure you have "
            "proportion estimates for the supplied meids")
        epi_draws.append(d)
    epi_draws = pd.concat(epi_draws)
    ip_epi_draws = []
    for y in range(epi_start_year, end_year, 5):
        sy = y
        ey = y + 5
        ip_draws = maths.interpolate(
            epi_draws.query('year_id==%s' % sy),
            epi_draws.query('year_id==%s' % ey),
            ['age_group_id', 'model_version_id', 'sex_id'],
            'year_id', ['draw_%s' % i for i in range(1000)],
            sy,
            ey,
            rank_df=epi_draws.query('year_id==%s' % rank_year))
        if ey != end_year:
            ip_draws = ip_draws[ip_draws.year_id != ey]
        ip_epi_draws.append(ip_draws)
    ip_epi_draws = pd.concat(ip_epi_draws)
    extrap_draws = []
    for y in range(start_year, epi_start_year):
        esy_draws = ip_epi_draws.query('year_id==%s' % epi_start_year)
        esy_draws['year_id'] = y
        extrap_draws.append(esy_draws)
    epi_draws = pd.concat([ip_epi_draws] + extrap_draws)
    epi_draws.to_csv(outpath)
Exemplo n.º 3
0
def interpolate_ls(cvid, lid, sid):
    modys = range(1990, 2016, 5)
    iplys = [y for y in range(1990, 2016) if y not in modys]
    idir = '/ihme/centralcomp/como/{cv}/draws/cause/total_interp/'.format(
        cv=cvid)
    try:
        os.makedirs(idir)
    except:
        pass

    moddfs = []
    for y in modys:
        moddfs.append(
            pd.read_hdf('/ihme/centralcomp/como/{cv}/draws/cause/total/'
                        '3_{l}_{y}_{s}.h5'.format(cv=cvid, l=lid, y=y, s=sid)))
    moddfs = pd.concat(moddfs)

    for i in range(len(modys) - 1):
        sy = modys[i]
        ey = modys[i + 1]
        print 'interpolating %s %s %s %s' % (lid, sid, sy, ey)
        id_cols = [
            'location_id', 'year_id', 'age_group_id', 'sex_id', 'cause_id'
        ]
        time_col = 'year_id'
        value_cols = ['draw_%s' % d for d in range(1000)]

        x = maths.interpolate(moddfs.query('year_id == %s' % sy),
                              moddfs.query('year_id == %s' % ey),
                              id_cols,
                              time_col,
                              value_cols,
                              sy,
                              ey,
                              rank_df=moddfs.query('year_id == 2005'))
        x = x[x.year_id.isin(iplys)]
        for y in x.year_id.unique():
            fn = '{id}/3_{l}_{y}_{s}.h5'.format(id=idir, l=lid, y=y, s=sid)
            x.query('year_id == %s' % y).to_hdf(fn,
                                                'draws',
                                                mode='w',
                                                format='table',
                                                data_columns=id_cols)
Exemplo n.º 4
0
def load_RSM(for_interp, interpd):
    # compile risk-standardized mort
    if not os.path.exists(for_interp):
        dfs = []
        for year_id in range(1990, 2011, 5) + [2016]:
            print('Collecting ' + str(year_id))
            df = data_formatter(year_id, locsdf)
            dfs.append(df)
        df = pd.concat(dfs)
        df.to_hdf(for_interp,
                  key="data",
                  format="table",
                  data_columns=['location_id', 'year_id'])
    else:
        df = pd.read_hdf(for_interp)

    # interpolate
    draw_cols = ['draw_{}'.format(i) for i in xrange(1000)]
    id_cols = ['location_id','age_group_id','sex_id','measure_id','metric_id','cause_id']
    dfs = []
    for year_range in [[1990, 1995], [1995, 2000], [2000, 2005], [2005, 2010], [2010, 2016]]:
        print(year_range)
        start_df = (df.ix[df.year_id == year_range[0]].sort_values(id_cols)
                    .reset_index(drop=True))
        end_df = (df.ix[df.year_id == year_range[1]].sort_values(id_cols)
                  .reset_index(drop=True))
        ydf = interpolate(start_df, end_df, id_cols, 'year_id', draw_cols,
                          year_range[0], year_range[1])
        ydf = ydf.query('year_id < {} or year_id == 2016'.format(year_range[1]))
        dfs.append(ydf)
    df = pd.concat(dfs)
    df.to_hdf(interpd,
              key="data",
              format="table",
              data_columns=['location_id', 'year_id'])

    return df