def run_spacetime(location_id, lambdaa=1.0, omega=2, zeta=0.95): # Results dir try: os.makedirs(stdir) except: pass # Read in some data... data = pd.read_csv("%s/linear_predictions.csv" % lindir) results = [] for sex in [1, 2]: sdata = data[data.sex_id == sex] ################################ # All country example ################################ # Initialize the smoother s = st.Smoother( sdata, 153, datavar='ln_dr', modelvar='ln_dr_predicted', # snvar='sn_flag', pred_age_group_ids=data.age_group_id.unique(), pred_start_year=data.year_id.min()) # Set parameters (can additionally specify omega (age weight, positive # real number) and zeta (space weight, between 0 and 1)) s.lambdaa = lambdaa s.omega = omega s.zeta = zeta # s.sn_weight = 0.2 # Tell the smoother to calculate both time weights and age weights s.time_weights() s.age_weights() # Run the smoother and write the results to a file s.smooth(location_id) # Using the "include_mad" will calculate the global / regional / # national MAD estimates of the ST residuals, in case you need them # for the GPR step... results = s.format_output(include_mad=True) r = s.results r['sex_id'] = sex results.append(r) results = pd.concat(results) results.to_csv('%s/%s.csv' % (stdir, location_id), index=False)
return pd.read_csv(f).set_index(idx_cols) results = [readf(f) for f in fs] results = pd.concat(results, axis=1) results = results.reset_index() # Calculate MADs forgpr = [] for sex in [1, 2]: sdata = data[data.sex_id == sex] sresults = results[results.sex_id == sex] sresults = sresults.drop('sex_id', axis=1) s = st.Smoother(sdata, 42, datavar='ln_dr', modelvar='ln_dr_predicted', pred_age_group_ids=data.age_group_id.unique(), pred_start_year=data.year_id.min()) s.results = sresults forgpr.append(s.calculate_mad()) forgpr = pd.concat(forgpr) """ Convert data variances to log space Use standard eror as the data variance. Approximate transformed variance using the delta method: G(X) = G(mu) + (X-mu)G'(mu) (approximately) Var(G(X)) = Var(X)*[G'(mu)]^2 (approximately) Examples: For G(X) = Logit(X)
def run_spacetime(location_id, df, age_start, age_end, year_start, year_end, lambdaa, zeta, zeta_nodata, omega): ################################ ## Setup ################################ ## Detect level and parent national_id = int(locs.level_3[locs.location_id == location_id]) level = int(locs.level[locs.location_id == location_id]) # Making sure that only borrowing strength from higher levels columns_to_keep = list(df.columns.values) df = df[((df.level <= 3) | (df.level_3 == national_id)) & (df.level <= level)] df = df[columns_to_keep] # Count the number of data (maximum number of data in an age group for that sex) data_count = df.loc[df.location_id == location_id].groupby( 'age_group_id').agg('count') data_count = np.max(data_count.data) # If data count is less than threshold, pass a flag to ST if data_count >= data_threshold: zeta_threshold = 1 else: zeta_threshold = 0 ## If level > 3, set zeta to 0.5 if level > 3: zeta = 0.5 ################################ ## Set weights ################################ # Initialize the smoother s = st.Smoother(df, location_set_version_id, timevar='year_id', agevar='age_group_id', spacevar='location_id', datavar='data', modelvar='prior', pred_age_group_ids=range(age_start, age_end + 1), pred_start_year=year_start, pred_end_year=year_end, snvar='cv_subgeo') # Set parameters (can additionally specify omega (age weight, positive real number) and zeta (space weight, between 0 and 1)) s.lambdaa = lambdaa s.zeta = zeta s.zeta_no_data = zeta_no_data if 22 not in pd.unique(df['age_group_id']): s.omega = omega # Tell the smoother to calculate both time weights and age weights s.time_weights() if 22 not in pd.unique(df['age_group_id']): s.age_weights() ################################ ## Run Smoother ################################ s.smooth(locs=location_id, level=level, zeta_threshold=zeta_threshold) results = pd.merge(df, s.long_result(), on=['age_group_id', 'year_id', 'location_id'], how='right') ################################ ## Clean ################################ cols = ['location_id', 'year_id', 'age_group_id', 'sex_id', 'st'] results = results[cols].drop_duplicates() return results