Exemplo n.º 1
0
    def test_monte_carlo_for_single_t(self, sim_t_fixed_data):
        # Estimating monte carlo for single t
        gt = MonteCarloGFormula(sim_t_fixed_data, idvar='id', exposure='A', outcome='Y', time_out='t', time_in='t0')
        gt.outcome_model('A + W1_sq + W2 + W3', print_results=False)
        gt.exposure_model('W1_sq', print_results=False)
        gt.fit(treatment="all", sample=1000000)  # Keep this a high number to reduce simulation errors
        print(gt.predicted_outcomes)

        # Estimating with TimeFixedGFormula
        gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y')
        gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False)
        gf.fit(treatment='all')

        # Expected behavior; same results between the estimation methods
        npt.assert_allclose(gf.marginal_outcome, np.mean(gt.predicted_outcomes['Y']), rtol=1e-3)
Exemplo n.º 2
0
 def test_complete_mc_procedure_completes(self):
     df = load_sample_data(timevary=True)
     df['lag_art'] = df['art'].shift(1)
     df['lag_art'] = np.where(df.groupby('id').cumcount() == 0, 0, df['lag_art'])
     df['lag_cd4'] = df['cd4'].shift(1)
     df['lag_cd4'] = np.where(df.groupby('id').cumcount() == 0, df['cd40'], df['lag_cd4'])
     df['lag_dvl'] = df['dvl'].shift(1)
     df['lag_dvl'] = np.where(df.groupby('id').cumcount() == 0, df['dvl0'], df['lag_dvl'])
     df[['age_rs0', 'age_rs1', 'age_rs2']] = spline(df, 'age0', n_knots=4, term=2, restricted=True)  # age spline
     df['cd40_sq'] = df['cd40'] ** 2
     df['cd40_cu'] = df['cd40'] ** 3
     df['cd4_sq'] = df['cd4'] ** 2
     df['cd4_cu'] = df['cd4'] ** 3
     df['enter_sq'] = df['enter'] ** 2
     df['enter_cu'] = df['enter'] ** 3
     g = MonteCarloGFormula(df, idvar='id', exposure='art', outcome='dead', time_in='enter', time_out='out')
     exp_m = '''male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + cd4 + cd4_sq + 
             cd4_cu + dvl + enter + enter_sq + enter_cu'''
     g.exposure_model(exp_m, restriction="g['lag_art']==0")
     out_m = '''art + male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + cd4 + 
             cd4_sq + cd4_cu + dvl + enter + enter_sq + enter_cu'''
     g.outcome_model(out_m, restriction="g['drop']==0")
     dvl_m = '''male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + lag_cd4 + 
             lag_dvl + lag_art + enter + enter_sq + enter_cu'''
     g.add_covariate_model(label=1, covariate='dvl', model=dvl_m, var_type='binary')
     cd4_m = '''male + age0 + age_rs0 + age_rs1 + age_rs2 +  cd40 + cd40_sq + cd40_cu + dvl0 + lag_cd4 + 
             lag_dvl + lag_art + enter + enter_sq + enter_cu'''
     cd4_recode_scheme = ("g['cd4'] = np.maximum(g['cd4'],1);"
                          "g['cd4_sq'] = g['cd4']**2;"
                          "g['cd4_cu'] = g['cd4']**3")
     g.add_covariate_model(label=2, covariate='cd4', model=cd4_m, recode=cd4_recode_scheme, var_type='continuous')
     cens_m = """male + age0 + age_rs0 + age_rs1 + age_rs2 +  cd40 + cd40_sq + cd40_cu + dvl0 + lag_cd4 +
              lag_dvl + lag_art + enter + enter_sq + enter_cu"""
     g.censoring_model(cens_m)
     g.fit(treatment="((g['art']==1) | (g['lag_art']==1))",
           lags={'art': 'lag_art',
                 'cd4': 'lag_cd4',
                 'dvl': 'lag_dvl'},
           sample=5000, t_max=None,
           in_recode=("g['enter_sq'] = g['enter']**2;"
                      "g['enter_cu'] = g['enter']**3"))
     assert isinstance(g.predicted_outcomes, type(pd.DataFrame()))
Exemplo n.º 3
0
def mc_gformula_check():
    df = load_sample_data(timevary=True)
    df['lag_art'] = df['art'].shift(1)
    df['lag_art'] = np.where(df.groupby('id').cumcount() == 0, 0, df['lag_art'])
    df['lag_cd4'] = df['cd4'].shift(1)
    df['lag_cd4'] = np.where(df.groupby('id').cumcount() == 0, df['cd40'], df['lag_cd4'])
    df['lag_dvl'] = df['dvl'].shift(1)
    df['lag_dvl'] = np.where(df.groupby('id').cumcount() == 0, df['dvl0'], df['lag_dvl'])
    df[['age_rs0', 'age_rs1', 'age_rs2']] = spline(df, 'age0', n_knots=4, term=2, restricted=True)  # age spline
    df['cd40_sq'] = df['cd40'] ** 2  # cd4 baseline cubic
    df['cd40_cu'] = df['cd40'] ** 3
    df['cd4_sq'] = df['cd4'] ** 2  # cd4 current cubic
    df['cd4_cu'] = df['cd4'] ** 3
    df['enter_sq'] = df['enter'] ** 2  # entry time cubic
    df['enter_cu'] = df['enter'] ** 3
    g = MonteCarloGFormula(df, idvar='id', exposure='art', outcome='dead', time_in='enter', time_out='out')
    exp_m = '''male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + cd4 + cd4_sq + 
            cd4_cu + dvl + enter + enter_sq + enter_cu'''
    g.exposure_model(exp_m, restriction="g['lag_art']==0")
    out_m = '''art + male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + cd4 + 
            cd4_sq + cd4_cu + dvl + enter + enter_sq + enter_cu'''
    g.outcome_model(out_m, restriction="g['drop']==0")
    dvl_m = '''male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + lag_cd4 + 
            lag_dvl + lag_art + enter + enter_sq + enter_cu'''
    g.add_covariate_model(label=1, covariate='dvl', model=dvl_m, var_type='binary')
    cd4_m = '''male + age0 + age_rs0 + age_rs1 + age_rs2 +  cd40 + cd40_sq + cd40_cu + dvl0 + lag_cd4 + 
            lag_dvl + lag_art + enter + enter_sq + enter_cu'''
    cd4_recode_scheme = ("g['cd4'] = np.maximum(g['cd4'],1);"
                         "g['cd4_sq'] = g['cd4']**2;"
                         "g['cd4_cu'] = g['cd4']**3")
    g.add_covariate_model(label=2, covariate='cd4', model=cd4_m,recode=cd4_recode_scheme, var_type='continuous')
    g.fit(treatment="((g['art']==1) | (g['lag_art']==1))",
          lags={'art': 'lag_art',
                'cd4': 'lag_cd4',
                'dvl': 'lag_dvl'},
          sample=10000, t_max=None,
          in_recode=("g['enter_sq'] = g['enter']**2;"
                     "g['enter_cu'] = g['enter']**3"))
    gf = g.predicted_outcomes
    kmn = KaplanMeierFitter()
    kmn.fit(durations=gf['out'], event_observed=gf['dead'])
    kmo = KaplanMeierFitter()
    kmo.fit(durations=df['out'], event_observed=df['dead'], entry=df['enter'])
    cens_m = """male + age0 + age_rs0 + age_rs1 + age_rs2 +  cd40 + cd40_sq + cd40_cu + dvl0 + lag_cd4 +
             lag_dvl + lag_art + enter + enter_sq + enter_cu"""
    g.censoring_model(cens_m)
    g.fit(treatment="((g['art']==1) | (g['lag_art']==1))",
          lags={'art': 'lag_art',
                'cd4': 'lag_cd4',
                'dvl': 'lag_dvl'},
          sample=10000, t_max=None,
          in_recode=("g['enter_sq'] = g['enter']**2;"
                     "g['enter_cu'] = g['enter']**3"))
    gf = g.predicted_outcomes
    kmc = KaplanMeierFitter()
    kmc.fit(durations=gf['out'], event_observed=gf['dead'])
    plt.step(kmn.event_table.index, 1 - kmn.survival_function_, c='g', where='post', label='Natural')
    plt.step(kmn.event_table.index, 1 - kmc.survival_function_, c='orange', where='post', label='Censor')
    plt.step(kmo.event_table.index, 1 - kmo.survival_function_, c='k', where='post', label='True')
    plt.legend()
    plt.show()
Exemplo n.º 4
0
df['cd4_cu'] = df['cd4']**3
df['enter_sq'] = df['enter']**2  # entry time cubic
df['enter_cu'] = df['enter']**3

mcgf = MonteCarloGFormula(
    df,  # Data set
    idvar='id',  # ID variable
    exposure='art',  # Exposure
    outcome='dead',  # Outcome
    time_in='enter',  # Start of study period
    time_out='out')  # End of time per study period
# Pooled Logistic Model: Treatment
exp_m = (
    'male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + '
    'cd4 + cd4_sq + cd4_cu + dvl + enter + enter_sq + enter_cu')
mcgf.exposure_model(exp_m, print_results=False, restriction="g['lag_art']==0"
                    )  # Restricts to only untreated (for ITT assumption)
# Pooled Logistic Model: Outcome
out_m = (
    'art + male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + '
    'cd4 + cd4_sq + cd4_cu + dvl + enter + enter_sq + enter_cu')
mcgf.outcome_model(
    out_m, print_results=False,
    restriction="g['drop']==0")  # Restricting to only uncensored individuals
# Pooled Logistic Model: Detectable viral load
dvl_m = (
    'male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + '
    'lag_cd4 + lag_dvl + lag_art + enter + enter_sq + enter_cu')
mcgf.add_covariate_model(
    label=1,  # Order to fit time-varying models in
    covariate='dvl',  # Time-varying confounder
    print_results=False,