def test_stabilized_weights(self, data): ipt = IPTW(data, treatment='A', stabilized=True) ipt.regression_models(model_denominator='L', print_results=False) ipt.fit() npt.assert_allclose( ipt.Weight, [1.5, 1.5, 2 / 3, 2 / 3, 2 / 3, 3 / 4, 3 / 4, 3 / 4, 3 / 4, 2])
def test_match_sas_unstabilized(self, sdata): sas_w_sum = 1038.051 sas_rd = -0.081519085 sas_rd_ci = -0.156199938, -0.006838231 model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ipt = IPTW(sdata, treatment='art', stabilized=False) ipt.regression_models(model) ipt.fit() sdata['iptw'] = ipt.Weight npt.assert_allclose(np.sum(sdata.dropna()['iptw']), sas_w_sum, rtol=1e-4) # Estimating GEE ind = sm.cov_struct.Independence() f = sm.families.family.Binomial(sm.families.links.identity) linrisk = smf.gee('dead ~ art', sdata['id'], sdata, cov_struct=ind, family=f, weights=sdata['iptw']).fit() npt.assert_allclose(linrisk.params[1], sas_rd, rtol=1e-5) npt.assert_allclose( (linrisk.conf_int()[0][1], linrisk.conf_int()[1][1]), sas_rd_ci, rtol=1e-4)
def test_unstabilized_weights_w_weights(self, data): data['weights'] = 2 ipt = IPTW(data, treatment='A', weights='weights', stabilized=False) ipt.regression_models(model_denominator='L', print_results=False) ipt.fit() npt.assert_allclose(ipt.Weight, [6, 6, 8 / 3, 8 / 3, 8 / 3, 3, 3, 3, 3, 8])
def test_match_sas_smr_u_stabilized(self, sdata): sas_rd = -0.080048197 sas_rd_ci = -0.153567335, -0.006529058 model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ipt = IPTW(sdata, treatment='art', standardize='unexposed', stabilized=True) ipt.regression_models(model) ipt.fit() sdata['iptw'] = ipt.Weight # Estimating GEE ind = sm.cov_struct.Independence() f = sm.families.family.Binomial(sm.families.links.identity) linrisk = smf.gee('dead ~ art', sdata['id'], sdata, cov_struct=ind, family=f, weights=sdata['iptw']).fit() npt.assert_allclose(linrisk.params[1], sas_rd, rtol=1e-5) npt.assert_allclose( (linrisk.conf_int()[0][1], linrisk.conf_int()[1][1]), sas_rd_ci, rtol=1e-4)
def test_match_r_stddiff(self): # Simulated data for variable detection and standardized differences df = pd.DataFrame() df['treat'] = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0] df['bin'] = [0, 1, 0, np.nan, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1] df['con'] = [ 0.1, 0.0, 1.0, 1.1, 2.2, 1.3, 0.1, 0.5, 0.9, 0.5, 0.3, 0.2, 0.7, 0.9, 1.4 ] df['dis'] = [0, 1, 3, 2, 1, 0, 0, 0, 0, 0, 1, 3, 2, 2, 1] df['cat'] = [1, 2, 3, 1, 1, 2, 3, 1, 3, 2, 1, 2, 3, 2, 1] ipt = IPTW(df, treatment='treat', stabilized=True) ipt.regression_models('bin + con + dis + C(cat)') ipt.fit() smd = ipt.standardized_mean_differences() npt.assert_allclose(np.array(smd['smd_u']), np.array([0.342997, 0.0, 0.06668, -0.513553]), rtol=1e-4) # for unweighted # TODO need to find an R package or something that calculates weighted SMD # currently compares to my own calculations npt.assert_allclose(np.array(smd['smd_w']), np.array([0.206072, -0.148404, 0.035752, 0.085844]), rtol=1e-4) # for weighted
def test_probability_calc(self, data): ipt = IPTW(data, treatment='A', stabilized=True) ipt.regression_models(model_denominator='L', print_results=False) ipt.fit() pd = ipt.ProbabilityDenominator pn = ipt.ProbabilityNumerator npt.assert_allclose(pn, [0.5]*10) npt.assert_allclose(pd, [1/3, 1/3, 0.75, 0.75, 0.75, 1/3, 1/3, 1/3, 1/3, 0.75])
def test_positivity_calculator(self, data): ipt = IPTW(data, treatment='A', stabilized=True) ipt.regression_models(model_denominator='L', print_results=False) ipt.fit() ipt.positivity() npt.assert_allclose(ipt._pos_avg, 1) npt.assert_allclose(ipt._pos_sd, 0.456435, rtol=1e-5) npt.assert_allclose(ipt._pos_min, 2 / 3) npt.assert_allclose(ipt._pos_max, 2)
def test_standardized_differences(self, sdata): ipt = IPTW(sdata, treatment='art', stabilized=True) ipt.regression_models('male + age0 + cd40 + dvl0') ipt.fit() smd = ipt.standardized_mean_differences() npt.assert_allclose(np.array(smd['smd_u']), np.array([-0.015684, 0.022311, -0.4867, -0.015729]), rtol=1e-4) # for unweighted # TODO find R package to test these weighted SMD's npt.assert_allclose(np.array(smd['smd_w']), np.array([-0.097789, -0.012395, -0.018591, 0.050719]), rtol=1e-4) # for weighted
def test_custom_models(self, sdata): model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' logd = LogisticRegression(penalty='l1', C=1.0, random_state=203) ipt = IPTW(sdata, treatment='art', standardize='unexposed', stabilized=True) ipt.regression_models(model, custom_model_denominator=logd) ipt.fit() sdata['iptw'] = ipt.Weight # Estimating GEE ind = sm.cov_struct.Independence() f = sm.families.family.Binomial(sm.families.links.identity) smf.gee('dead ~ art', sdata['id'], sdata, cov_struct=ind, family=f, weights=sdata['iptw']).fit()
def causal_check(): # Check IPTW plots data = load_sample_data(False) data[['cd4_rs1', 'cd4_rs2']] = spline(data, 'cd40', n_knots=3, term=2, restricted=True) data[['age_rs1', 'age_rs2']] = spline(data, 'age0', n_knots=3, term=2, restricted=True) ipt = IPTW(data, treatment='art', stabilized=True) ipt.regression_models( 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') ipt.fit() ipt.plot_love() plt.tight_layout() plt.show() ipt.plot_kde() plt.show() ipt.plot_kde(measure='logit') plt.show() ipt.plot_boxplot() plt.show() ipt.plot_boxplot(measure='logit') plt.show() # Check SurvivalGFormula plots df = load_sample_data(False).drop(columns=['cd4_wk45']) df['t'] = np.round(df['t']).astype(int) df = pd.DataFrame(np.repeat(df.values, df['t'], axis=0), columns=df.columns) df['t'] = df.groupby('id')['t'].cumcount() + 1 df.loc[((df['dead'] == 1) & (df['id'] != df['id'].shift(-1))), 'd'] = 1 df['d'] = df['d'].fillna(0) df['t_sq'] = df['t']**2 df['t_cu'] = df['t']**3 sgf = SurvivalGFormula(df, idvar='id', exposure='art', outcome='d', time='t') sgf.outcome_model( model='art + male + age0 + cd40 + dvl0 + t + t_sq + t_cu') sgf.fit(treatment='all') sgf.plot() plt.show() sgf.plot(c='r', linewidth=3, alpha=0.8) plt.show()
def causal_check(): # 9) Check IPTW plots data = load_sample_data(False) data[['cd4_rs1', 'cd4_rs2']] = spline(data, 'cd40', n_knots=3, term=2, restricted=True) data[['age_rs1', 'age_rs2']] = spline(data, 'age0', n_knots=3, term=2, restricted=True) ipt = IPTW(data, treatment='art', stabilized=True) ipt.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') ipt.fit() ipt.plot_love() plt.tight_layout() plt.show() ipt.plot_kde() plt.show() ipt.plot_kde(measure='logit') plt.show() ipt.plot_boxplot() plt.show() ipt.plot_boxplot(measure='logit') plt.show()
def test_match_iptw_continuous(self, cdata): model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' cdata = cdata.dropna().copy() # Estimating Marginal Structural Model ipt = IPTW(cdata, treatment='art', stabilized=False) ipt.regression_models(model) ipt.fit() cdata['iptw'] = ipt.Weight ind = sm.cov_struct.Independence() f = sm.families.family.Gaussian() linrisk = smf.gee('cd4_wk45 ~ art', cdata['id'], cdata, cov_struct=ind, family=f, weights=cdata['iptw']).fit() # Estimating 'Stochastic Treatment' sipw = StochasticIPTW(cdata, treatment='art', outcome='cd4_wk45') sipw.treatment_model(model='male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) sipw.fit(p=1.0) r_all = sipw.marginal_outcome sipw.fit(p=0.0) r_non = sipw.marginal_outcome npt.assert_allclose(linrisk.params[1], r_all - r_non, atol=1e-4)
def test_unstabilized_positivity_warning(self, data): ipt = IPTW(data, treatment='A', stabilized=False) ipt.regression_models(model_denominator='L', print_results=False) ipt.fit() with pytest.warns(UserWarning): ipt.positivity()