def ihdp_miwae(set_id_range=range(1, 1001), prop_miss=0.1, seed=0, d_miwae=3, n_epochs=602, sig_prior=1, add_wy=False, method="glm", **kwargs): from miwae import miwae l_scores = [] for set_id in set_id_range: X = pd.read_csv('./data/IHDP/csv/R_ate_ihdp_npci_' + str(set_id) + '.csv') w = np.array(X.iloc[:, 0]).reshape((-1, 1)) y = np.array(X.iloc[:, 1]).reshape((-1, 1)) X = np.array(X.iloc[:, 5:]) X_miss = ampute(X, prop_miss=prop_miss, seed=seed) if set_id == 1: if add_wy: xhat, zhat, zhat_mul = miwae(X_miss, d_miwae=d_miwae, sig_prior=sig_prior, n_epochs=n_epochs, add_wy=add_wy, w=w, y=y) else: xhat, zhat, zhat_mul = miwae(X_miss, d_miwae=d_miwae, sig_prior=sig_prior, n_epochs=n_epochs, add_wy=add_wy) # print('shape of outputs miwae:') # print('xhat.shape, zhat.shape, zhat_mul.shape:') # (1000, 200) (1000, 3) (200, 1000, 3) print(xhat.shape, zhat.shape, zhat_mul.shape) print(set_id) # Tau estimated on Zhat=E[Z|X] ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y) res_tau_ols = tau_ols(zhat, w, y) res_tau_ols_ps = tau_ols_ps(zhat, w, y) res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method) res_tau_diffmu = np.mean(y1_hat - y0_hat) lr = LinearRegression() lr.fit(zhat, y) y_hat = lr.predict(zhat) res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method) # Tau estimated on Zhat^(b), l=1,...,B sampled from posterior res_mul_tau_dr = [] res_mul_tau_ols = [] res_mul_tau_ols_ps = [] res_mul_tau_resid = [] res_mul_tau_diffmu = [] for zhat_b in zhat_mul: ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat_b, w, y) res_mul_tau_dr.append(tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)) res_mul_tau_ols.append(tau_ols(zhat_b, w, y)) res_mul_tau_ols_ps.append(tau_ols_ps(zhat_b, w, y)) res_mul_tau_diffmu.append(np.mean(y1_hat - y0_hat)) lr = LinearRegression() lr.fit(zhat, y) y_hat = lr.predict(zhat_b) res_mul_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method) res_mul_tau_dr = np.mean(res_mul_tau_dr) res_mul_tau_ols = np.mean(res_mul_tau_ols) res_mul_tau_ols_ps = np.mean(res_mul_tau_ols_ps) res_mul_tau_resid = np.mean(res_mul_tau_resid) res_mul_tau_diffmu = np.mean(res_mul_tau_diffmu) dcor_zhat = np.nan dcor_zhat_mul = np.nan score = [ prop_miss, d_miwae, sig_prior, n_epochs, add_wy, set_id, res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid, res_tau_diffmu, res_mul_tau_dr, res_mul_tau_ols, res_mul_tau_ols_ps, res_mul_tau_resid, res_mul_tau_diffmu, dcor_zhat, dcor_zhat_mul ] l_scores.append(score) score_data = pd.DataFrame( l_scores, columns=list([ 'prop_miss', 'd_miwae', 'sig_prior', 'n_epochs', 'add_wy', 'set_id', 'res_tau_dr', 'res_tau_ols', 'res_tau_ols_ps', 'res_tau_resid', 'res_tau_diffmu', 'res_mul_tau_dr', 'res_mul_tau_ols', 'res_mul_tau_ols_ps', 'res_mul_tau_resid', 'res_mul_tau_diffmu', 'dcor_zhat', 'dcor_zhat_mul' ])) return score_data
def exp_miwae(model="dlvm", n=1000, d=3, p=100, prop_miss=0.1, citcio=False, seed=0, d_miwae=3, n_epochs=602, sig_prior=1, add_wy=False, num_samples_zmul=200, method="glm", **kwargs): from miwae import miwae if model == "lrmf": Z, X, w, y, ps = gen_lrmf(n=n, d=d, p=p, citcio=citcio, prop_miss=prop_miss, seed=seed) elif model == "dlvm": Z, X, w, y, ps = gen_dlvm(n=n, d=d, p=p, citcio=citcio, prop_miss=prop_miss, seed=seed) else: raise NotImplementedError( "Other data generating models not implemented here yet.") X_miss = ampute(X, prop_miss=prop_miss, seed=seed) if add_wy: xhat, zhat, zhat_mul = miwae(X_miss, d=d_miwae, sig_prior=sig_prior, num_samples_zmul=num_samples_zmul, n_epochs=n_epochs, add_wy=add_wy, w=w, y=y) else: xhat, zhat, zhat_mul = miwae(X_miss, d=d_miwae, sig_prior=sig_prior, num_samples_zmul=num_samples_zmul, n_epochs=n_epochs, add_wy=add_wy) # print('shape of outputs miwae:') # print('xhat.shape, zhat.shape, zhat_mul.shape:') # (1000, 200) (1000, 3) (200, 1000, 3) print(xhat.shape, zhat.shape, zhat_mul.shape) # Tau estimated on Zhat=E[Z|X] ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y) res_tau_ols = tau_ols(zhat, w, y) res_tau_ols_ps = tau_ols_ps(zhat, w, y) res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method) lr = LinearRegression() lr.fit(zhat, y) y_hat = lr.predict(zhat) res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method) # Tau estimated on Zhat^(b), l=1,...,B sampled from posterior res_mul_tau_dr = [] res_mul_tau_ols = [] res_mul_tau_ols_ps = [] res_mul_tau_resid = [] for zhat_b in zhat_mul: ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat_b, w, y) res_mul_tau_dr.append(tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)) res_mul_tau_ols.append(tau_ols(zhat_b, w, y)) res_mul_tau_ols_ps.append(tau_ols_ps(zhat_b, w, y)) lr = LinearRegression() lr.fit(zhat_b, y) y_hat = lr.predict(zhat_b) res_mul_tau_resid.append(tau_residuals(y, w, y_hat, ps_hat, method)) res_mul_tau_dr = np.mean(res_mul_tau_dr) res_mul_tau_ols = np.mean(res_mul_tau_ols) res_mul_tau_ols_ps = np.mean(res_mul_tau_ols_ps) res_mul_tau_resid = np.mean(res_mul_tau_resid) if Z.shape[1] == zhat.shape[1]: dcor_zhat = dcor(Z, zhat) dcor_zhat_mul = [] for zhat_b in zhat_mul: dcor_zhat_mul.append(dcor(Z, zhat_b)) dcor_zhat_mul = np.mean(dcor_zhat_mul) return res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid, res_mul_tau_dr, res_mul_tau_ols, res_mul_tau_ols_ps, res_mul_tau_resid, dcor_zhat, dcor_zhat_mul
def ihdp_baseline(set_id=1, prop_miss=0.1, seed=0, full_baseline=False, add_wy=False, sig_prior=1, d_miwae=10, n_epochs=10, method="glm", **kwargs): X = pd.read_csv('./data/IHDP/csv/R_ate_ihdp_npci_' + str(set_id) + '.csv') w = np.array(X.iloc[:, 0]).reshape((-1, 1)) y = np.array(X.iloc[:, 1]).reshape((-1, 1)) X = np.array(X.iloc[:, 5:]) X_miss = ampute(X, prop_miss=prop_miss, seed=seed) X_imp_mean = np.zeros(X_miss.shape) X_imp_mice = np.zeros(X_miss.shape) try: from sklearn.impute import SimpleImputer X_imp_mean = SimpleImputer().fit_transform(X_miss) except: pass try: from sklearn.impute import IterativeImputer X_imp_mice = IterativeImputer()().fit_transform(X_miss) except: pass algo_name = ['X', 'X_imp_mean'] algo_ = [X, X_imp_mean] if full_baseline: # complete the baseline Z_mf = get_U_softimpute(X_miss) # need try-except for sklearn version try: from sklearn.impute import IterativeImputer X_imp = IterativeImputer().fit_transform(X_miss) except: from sklearn.experimental import enable_iterative_imputer from sklearn.impute import IterativeImputer X_imp = IterativeImputer().fit_transform(X_miss) algo_name += ['X_imp', 'Z_mf'] #, 'Z_perm'] algo_ += [X_imp, Z_mf] #, Z_perm] tau = dict() for name, zhat in zip(algo_name, algo_): if name == 'X_mi': res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid = tau_mi( zhat, w, y, method=method) else: ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y) res_tau_ols = tau_ols(zhat, w, y) res_tau_ols_ps = tau_ols_ps(zhat, w, y) res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method) lr = LinearRegression() lr.fit(zhat, y) y_hat = lr.predict(zhat) res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method) tau[name] = res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid return tau
def exp_baseline(model="dlvm", n=1000, d=3, p=100, prop_miss=0.1, citcio=False, seed=0, full_baseline=False, method="glm", **kwargs): if model == "lrmf": Z, X, w, y, ps = gen_lrmf(n=n, d=d, p=p, citcio=citcio, prop_miss=prop_miss, seed=seed) elif model == "dlvm": Z, X, w, y, ps = gen_dlvm(n=n, d=d, p=p, citcio=citcio, prop_miss=prop_miss, seed=seed) else: raise NotImplementedError( "Other data generating models not implemented here yet.") X_miss = ampute(X, prop_miss=prop_miss, seed=seed) from sklearn.impute import SimpleImputer X_imp_mean = SimpleImputer().fit_transform(X_miss) Z_perm = np.random.permutation(Z) # Z_rnd = np.random.randn(Z.shape[0], Z.shape[1]) algo_name = ['Z', 'X'] #, 'X_imp_mean'] algo_ = [Z, X] #, X_imp_mean] if full_baseline: # complete the baseline Z_mf = get_U_softimpute(X_miss) # need try-except for sklearn version try: from sklearn.impute import IterativeImputer X_imp = IterativeImputer().fit_transform(X_miss) except: from sklearn.experimental import enable_iterative_imputer from sklearn.impute import IterativeImputer X_imp = IterativeImputer().fit_transform(X_miss) algo_name += ['Z_mf'] #['X_imp','Z_mf']#, 'Z_perm'] algo_ += [Z_mf] #[X_imp, Z_mf]#, Z_perm] tau = dict() for name, zhat in zip(algo_name, algo_): if name == 'X_mi': res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid = tau_mi( zhat, w, y, method=method) else: ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y) res_tau_ols = tau_ols(zhat, w, y) res_tau_ols_ps = tau_ols_ps(zhat, w, y) res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method) lr = LinearRegression() lr.fit(zhat, y) y_hat = lr.predict(zhat) res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method) tau[name] = res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid return tau