def test_error_summary(self, r_network): tmle = NetworkTMLE(network=r_network, exposure='A', outcome='Y') tmle.exposure_model('W') tmle.exposure_map_model('W', distribution=None) tmle.outcome_model('A + W') with pytest.raises(ValueError): tmle.summary()
def test_marginal_vector_length_stoch(self, r_network): tmle = NetworkTMLE(network=r_network, exposure='A', outcome='Y') tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', distribution=None) tmle.outcome_model('A + W + A_sum + W_sum') tmle.fit(p=0.4, samples=10, seed=20110129) assert len(tmle.marginals_vector) == 10
def test_error_fit_gsmodel(self, r_network): tmle = NetworkTMLE(network=r_network, exposure='A', outcome='Y') tmle.exposure_model('W') # tmle.exposure_map_model('W', distribution=None) tmle.outcome_model('A + W') with pytest.raises(ValueError): tmle.fit(p=0.0, samples=10)
def test_check_denominator_est(self, r_network): tmle = NetworkTMLE(network=r_network, exposure='A', outcome='Y') tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', distribution=None) tmle.outcome_model('A + W + A_sum + W_sum') assert tmle._denominator_estimated_ is False tmle.fit(p=0.4, samples=5, seed=20110129) assert tmle._denominator_estimated_ is True
def test_qmodel_params6(self, sm_network): # Comparing to SAS linear regression sas_params = [-1.2612, 0.6328, -0.1340] sas_preds = [0.2167149, 0.2167149, 0.2833184, 0.5334486, 0.2833184, 0.4665515] tmle = NetworkTMLE(network=sm_network, exposure='A', outcome='C', degree_restrict=[0, 2]) # Restricted tmle.outcome_model('A + A_sum', distribution='poisson') est_params = tmle._outcome_model.params est_preds = tmle._Qinit_ npt.assert_allclose(sas_params, est_params, atol=1e-4) npt.assert_allclose(sas_preds, est_preds, atol=1e-6)
def test_qmodel_params5(self, sm_network): # Comparing to SAS linear regression sas_params = [0.3718, 0.2436, -0.0128, -0.2179] sas_preds = [0.3461641, 0.1282299, 0.1538692, 0.6153769, 0.3718034, 0.3846231] tmle = NetworkTMLE(network=sm_network, exposure='A', outcome='C', degree_restrict=[0, 2]) # Restricted tmle.outcome_model('A + A_sum + W') est_params = tmle._outcome_model.params est_preds = tmle._Qinit_ npt.assert_allclose(sas_params, est_params, atol=1e-4) npt.assert_allclose(sas_preds, est_preds, atol=1e-6)
def test_qmodel_params3(self, sm_network): # Comparing to SAS linear regression sas_params = [-1.1691, 0.7686, -0.0028, -0.6053] sas_preds = [0.3637460, 0.3089358, 0.6681595, 0.3089358, 0.1686493, 0.1695826, 0.6700057, 0.3106454, 0.3647510] tmle = NetworkTMLE(network=sm_network, exposure='A', outcome='C') tmle.outcome_model('A + A_sum + W', distribution='poisson') est_params = tmle._outcome_model.params est_preds = tmle._Qinit_ npt.assert_allclose(sas_params, est_params, atol=1e-4) npt.assert_allclose(sas_preds, est_preds, atol=1e-6)
def test_qmodel_params2(self, sm_network): # Comparing to SAS linear regression sas_params = [0.3598, 0.2806, -0.0187, -0.2100] sas_preds = [0.3929295, 0.3223863, 0.6216814, 0.3223863, 0.1123546, 0.1497950, 0.6404016, 0.3598267, 0.4116497] tmle = NetworkTMLE(network=sm_network, exposure='A', outcome='C') tmle.outcome_model('A + A_sum + W') est_params = tmle._outcome_model.params est_preds = tmle._Qinit_ npt.assert_allclose(sas_params, est_params, atol=1e-4) npt.assert_allclose(sas_preds, est_preds, atol=1e-6)
def test_gmodel_params(self, r_network): # Nonparametric g-model tmle = NetworkTMLE(network=r_network, exposure='A', outcome='Y') tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', distribution=None) tmle.outcome_model('A + A_sum + W + W_sum') tmle.fit(p=0.5, samples=1) sas_gi_param = [-1.2043, 1.4001, 0.6412] est_params = tmle._treatment_models[0].params npt.assert_allclose(est_params, sas_gi_param, atol=1e-4) sas_gs1_param = [-1.8720, 0.1960, 0.0815, 1.6364] est_params = tmle._treatment_models[1].params npt.assert_allclose(est_params, sas_gs1_param, atol=1e-4) sas_gs2_param = [-2.7908, -0.2574, -0.1038, 1.7206, -0.2127] est_params = tmle._treatment_models[2].params npt.assert_allclose(est_params, sas_gs2_param, atol=1e-4) # Poisson gs-model tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', distribution='poisson', measure='sum') tmle.fit(p=0.5, samples=1) sas_gi_param = [-1.2043, 1.4001, 0.6412] est_params = tmle._treatment_models[0].params npt.assert_allclose(est_params, sas_gi_param, atol=1e-4) sas_gs_param = [-1.5670, 0.0150, 0.0201, 1.0277] est_params = tmle._treatment_models[1].params npt.assert_allclose(est_params, sas_gs_param, atol=1e-4) # Linear gs-model tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', distribution='normal', measure='sum') tmle.fit(p=0.5, samples=1) sas_gi_param = [-1.2043, 1.4001, 0.6412] est_params = tmle._treatment_models[0].params npt.assert_allclose(est_params, sas_gi_param, atol=1e-4) sas_gs_param = [0.18303, 0.01088, 0.00271, 0.53839] est_params = tmle._treatment_models[1].params npt.assert_allclose(est_params, sas_gs_param, atol=1e-4) tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', distribution='normal', measure='mean') tmle.fit(p=0.5, samples=1) sas_gs_param = [0.12776, 0.02769, 0.01849, 0.31258] est_params = tmle._treatment_models[1].params npt.assert_allclose(est_params, sas_gs_param, atol=1e-4)
def test_qmodel_params4(self, sm_network): # Comparing to SAS logit model sas_params = [-0.9628, 0.3087] sas_preds = [0.4144844, 0.4144844, 0.2763229, 0.2763229, 0.2763229, 0.3420625] tmle = NetworkTMLE(network=sm_network, exposure='A', outcome='Y', degree_restrict=[0, 2]) # Restricted tmle.outcome_model('A_sum') est_params = tmle._outcome_model.params est_preds = tmle._Qinit_ print(est_preds) npt.assert_allclose(sas_params, est_params, atol=1e-4) npt.assert_allclose(sas_preds, est_preds)
def test_qmodel_params1(self, sm_network): # Comparing to SAS logit model sas_params = [-1.5109, -0.9583, 0.3694, 1.5332] sas_preds = [0.45083299, 0.31601450, 0.10911248, 0.31601450, 0.68157767, 0.50558116, 0.07804636, 0.18081217, 0.36200818] tmle = NetworkTMLE(network=sm_network, exposure='A', outcome='Y') tmle.outcome_model('A + A_sum + W') est_params = tmle._outcome_model.params est_preds = tmle._Qinit_ npt.assert_allclose(sas_params, est_params, atol=1e-4) npt.assert_allclose(sas_preds, est_preds)
def test_error_p_bound(self, r_network): tmle = NetworkTMLE(network=r_network, exposure='A', outcome='Y') tmle.exposure_model('W') tmle.exposure_map_model('W', distribution=None) tmle.outcome_model('A + W') # For single 'p' with pytest.raises(ValueError): tmle.fit(p=1.5, samples=10) # For multiple 'p' with pytest.raises(ValueError): tmle.fit(p=[0.1, 1.5, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], samples=100)
def test_procedure_vs_sas(self, r_network): sas_params = [-2.3922, 0.8113, 1.0667, 1.5355, 1.2313] tmle = NetworkTMLE(network=r_network, exposure='A', outcome='Y') # Checking Q-model results tmle.outcome_model('A + W + A_sum + W_sum', distribution='poisson') est_params = tmle._outcome_model.params npt.assert_allclose(sas_params, est_params, atol=1e-4) # Checking g-model denominator est_gi_param = [-1.2043, 1.4001, 0.6412] est_gs1_param = [-1.8720, 0.1960, 0.0815, 1.6364] est_gs2_param = [-2.7908, -0.2574, -0.1038, 1.7206, -0.2127] tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', distribution=None) tmle.fit(p=0.5, samples=10) est_params = tmle._treatment_models[0].params npt.assert_allclose(est_params, est_gi_param, atol=1e-4) est_params = tmle._treatment_models[1].params npt.assert_allclose(est_params, est_gs1_param, atol=1e-4) est_params = tmle._treatment_models[2].params npt.assert_allclose(est_params, est_gs2_param, atol=1e-4)
results.loc[i, 'var_' + str(p)] = np.nan results.loc[i, 'lcl_' + str(p)] = np.nan results.loc[i, 'ucl_' + str(p)] = np.nan else: # Network TMLE ntmle = NetworkTMLE(H, exposure=exposure, outcome=outcome, degree_restrict=degree_restrict) ntmle.define_threshold(variable='diet', threshold=3, definition='sum') ntmle.exposure_model(gin_model) ntmle.exposure_map_model(gsn_model, measure=measure_gs, distribution=distribution_gs) ntmle.outcome_model(qn_model, distribution='normal') for p in prop_treated: # loops through all treatment plans try: if shift: z = odds_to_probability(np.exp(log_odds + p)) ntmle.fit(p=z, bound=0.01) else: ntmle.fit(p=p, bound=0.01) results.loc[i, 'bias_' + str(p)] = ntmle.marginal_outcome - truth[p] results.loc[i, 'var_' + str(p)] = ntmle.conditional_variance results.loc[i, 'lcl_' + str(p)] = ntmle.conditional_ci[0] results.loc[i, 'ucl_' + str(p)] = ntmle.conditional_ci[1] except: results.loc[i, 'bias_' + str(p)] = np.nan results.loc[i, 'var_' + str(p)] = np.nan
######################################## # Running simulation ######################################## for i in range(n_mc): # Generating Data H = obs_net_gen(G) df = network_to_df(H) results.loc[i, 'inc_'+exposure] = np.mean(df[exposure]) results.loc[i, 'inc_'+outcome] = np.mean(df[outcome]) if estimator == 'tmle': # Network TMLE ntmle = NetworkTMLE(H, exposure=exposure, outcome=outcome) ntmle.exposure_model(gi_model) ntmle.exposure_map_model(gs_model) ntmle.outcome_model(qi_model) for p in prop_treated: # loops through all treatment plans try: ntmle.fit(p=p, bound=0.01) results.loc[i, 'bias_'+str(p)] = ntmle.marginal_outcome - truth[p] results.loc[i, 'var_'+str(p)] = ntmle.conditional_variance results.loc[i, 'lcl_'+str(p)] = ntmle.conditional_ci[0] results.loc[i, 'ucl_'+str(p)] = ntmle.conditional_ci[1] except: results.loc[i, 'bias_'+str(p)] = np.nan results.loc[i, 'var_'+str(p)] = np.nan results.loc[i, 'lcl_'+str(p)] = np.nan results.loc[i, 'ucl_'+str(p)] = np.nan elif estimator == 'iptw': niptw = NetworkIPTW(H, exposure=exposure, outcome=outcome, verbose=False) niptw.exposure_model(gi_model)
results.loc[i, 'bias_' + str(p)] = np.nan results.loc[i, 'var_' + str(p)] = np.nan results.loc[i, 'lcl_' + str(p)] = np.nan results.loc[i, 'ucl_' + str(p)] = np.nan else: # Network TMLE ntmle = NetworkTMLE(H, exposure=exposure, outcome=outcome, degree_restrict=degree_restrict) ntmle.exposure_model(gin_model) ntmle.exposure_map_model(gsn_model, measure=measure_gs, distribution=distribution_gs) ntmle.outcome_model(qn_model) for p in prop_treated: # loops through all treatment plans try: if shift: z = odds_to_probability(np.exp(log_odds + p)) ntmle.fit(p=z, bound=0.01) else: ntmle.fit(p=p, bound=0.01) results.loc[i, 'bias_' + str(p)] = ntmle.marginal_outcome - truth[p] results.loc[i, 'var_' + str(p)] = ntmle.conditional_variance results.loc[i, 'lcl_' + str(p)] = ntmle.conditional_ci[0] results.loc[i, 'ucl_' + str(p)] = ntmle.conditional_ci[1] except: results.loc[i, 'bias_' + str(p)] = np.nan results.loc[i, 'var_' + str(p)] = np.nan
df['NETID_split'] = df['Net_str'].str.split() G = nx.DiGraph() G.add_nodes_from(df['IDs']) # Adding edges for i, c in zip(df['IDs'], df['NETID_split']): if type(c) is list: for j in c: G.add_edge(i, int(j[1:])) # Adding attributes for node in G.nodes(): G.nodes[node]['W'] = np.int(df.loc[df['IDs'] == node, 'W1']) G.nodes[node]['A'] = np.int(df.loc[df['IDs'] == node, 'A']) G.nodes[node]['Y'] = np.int(df.loc[df['IDs'] == node, 'Y']) tmle = NetworkTMLE(network=G, exposure='A', outcome='Y', verbose=True) tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', measure=None, distribution=None) tmle.outcome_model('A + A_sum + W + W_sum') tmle.fit(p=0.35, samples=1000, bound=0.005) tmle.summary(decimal=6) tmle = NetworkTMLE(network=G, exposure='A', outcome='Y', verbose=True) tmle.exposure_model('W + W_sum') tmle.exposure_map_model('A + W + W_sum', measure=None, distribution=None) tmle.outcome_model('A + A_sum + W + W_sum') tmle.fit(p=0.65, samples=1000, bound=0.005) tmle.summary(decimal=6)