Exemplo n.º 1
0
def run_spacetime(location_id, lambdaa=1.0, omega=2, zeta=0.95):

    # Results dir
    try:
        os.makedirs(stdir)
    except:
        pass

    # Read in some data...
    data = pd.read_csv("%s/linear_predictions.csv" % lindir)
    results = []
    for sex in [1, 2]:

        sdata = data[data.sex_id == sex]

        ################################
        # All country example
        ################################

        # Initialize the smoother
        s = st.Smoother(
            sdata,
            153,
            datavar='ln_dr',
            modelvar='ln_dr_predicted',
            # snvar='sn_flag',
            pred_age_group_ids=data.age_group_id.unique(),
            pred_start_year=data.year_id.min())

        # Set parameters (can additionally specify omega (age weight, positive
        # real number) and zeta (space weight, between 0 and 1))
        s.lambdaa = lambdaa
        s.omega = omega
        s.zeta = zeta
        # s.sn_weight = 0.2

        # Tell the smoother to calculate both time weights and age weights
        s.time_weights()
        s.age_weights()

        # Run the smoother and write the results to a file
        s.smooth(location_id)

        # Using the "include_mad" will calculate the global / regional /
        # national MAD estimates of the ST residuals, in case you need them
        # for the GPR step... results = s.format_output(include_mad=True)
        r = s.results
        r['sex_id'] = sex
        results.append(r)

    results = pd.concat(results)
    results.to_csv('%s/%s.csv' % (stdir, location_id), index=False)
Exemplo n.º 2
0
    return pd.read_csv(f).set_index(idx_cols)


results = [readf(f) for f in fs]
results = pd.concat(results, axis=1)
results = results.reset_index()

# Calculate MADs
forgpr = []
for sex in [1, 2]:
    sdata = data[data.sex_id == sex]
    sresults = results[results.sex_id == sex]
    sresults = sresults.drop('sex_id', axis=1)
    s = st.Smoother(sdata,
                    42,
                    datavar='ln_dr',
                    modelvar='ln_dr_predicted',
                    pred_age_group_ids=data.age_group_id.unique(),
                    pred_start_year=data.year_id.min())
    s.results = sresults
    forgpr.append(s.calculate_mad())
forgpr = pd.concat(forgpr)
"""
Convert data variances to log space

    Use standard eror as the data variance.  Approximate transformed
    variance using the delta method:
        G(X) = G(mu) + (X-mu)G'(mu) (approximately)
        Var(G(X)) = Var(X)*[G'(mu)]^2 (approximately)

    Examples:
        For G(X) = Logit(X)
Exemplo n.º 3
0
def run_spacetime(location_id, df, age_start, age_end, year_start, year_end,
                  lambdaa, zeta, zeta_nodata, omega):

    ################################
    ## Setup
    ################################

    ## Detect level and parent
    national_id = int(locs.level_3[locs.location_id == location_id])
    level = int(locs.level[locs.location_id == location_id])

    # Making sure that only borrowing strength from higher levels
    columns_to_keep = list(df.columns.values)
    df = df[((df.level <= 3) | (df.level_3 == national_id))
            & (df.level <= level)]
    df = df[columns_to_keep]

    # Count the number of data (maximum number of data in an age group for that sex)
    data_count = df.loc[df.location_id == location_id].groupby(
        'age_group_id').agg('count')
    data_count = np.max(data_count.data)

    # If data count is less than threshold, pass a flag to ST
    if data_count >= data_threshold:
        zeta_threshold = 1
    else:
        zeta_threshold = 0

    ## If level > 3, set zeta to 0.5
    if level > 3:
        zeta = 0.5

    ################################
    ## Set weights
    ################################
    # Initialize the smoother
    s = st.Smoother(df,
                    location_set_version_id,
                    timevar='year_id',
                    agevar='age_group_id',
                    spacevar='location_id',
                    datavar='data',
                    modelvar='prior',
                    pred_age_group_ids=range(age_start, age_end + 1),
                    pred_start_year=year_start,
                    pred_end_year=year_end,
                    snvar='cv_subgeo')

    # Set parameters (can additionally specify omega (age weight, positive real number) and zeta (space weight, between 0 and 1))
    s.lambdaa = lambdaa
    s.zeta = zeta
    s.zeta_no_data = zeta_no_data
    if 22 not in pd.unique(df['age_group_id']):
        s.omega = omega

    # Tell the smoother to calculate both time weights and age weights
    s.time_weights()
    if 22 not in pd.unique(df['age_group_id']):
        s.age_weights()

    ################################
    ## Run Smoother
    ################################
    s.smooth(locs=location_id, level=level, zeta_threshold=zeta_threshold)
    results = pd.merge(df,
                       s.long_result(),
                       on=['age_group_id', 'year_id', 'location_id'],
                       how='right')

    ################################
    ## Clean
    ################################
    cols = ['location_id', 'year_id', 'age_group_id', 'sex_id', 'st']
    results = results[cols].drop_duplicates()

    return results