Exemplo n.º 1
0
def exp_cevae(model="dlvm",
              n=1000,
              d=3,
              p=100,
              prop_miss=0.1,
              citcio=False,
              seed=0,
              d_cevae=20,
              n_epochs=402,
              method="glm",
              **kwargs):

    # import here because of differents sklearn version used
    from cevae_tf import cevae_tf
    from sklearn.preprocessing import Imputer

    if model == "lrmf":
        Z, X, w, y, ps = gen_lrmf(n=n,
                                  d=d,
                                  p=p,
                                  citcio=citcio,
                                  prop_miss=prop_miss,
                                  seed=seed)
    elif model == "dlvm":
        Z, X, w, y, ps = gen_dlvm(n=n,
                                  d=d,
                                  p=p,
                                  citcio=citcio,
                                  prop_miss=prop_miss,
                                  seed=seed)
    else:
        raise NotImplementedError(
            "Other data generating models not implemented here yet.")

    X_miss = ampute(X, prop_miss=prop_miss, seed=seed)
    X_imp = Imputer().fit_transform(X_miss)

    y0_hat, y1_hat = cevae_tf(X_imp, w, y, d_cevae=d_cevae, n_epochs=n_epochs)

    # Tau estimated on Zhat=E[Z|X]
    ps_hat = np.ones(len(y0_hat)) / 2
    # res_tau_ols = tau_ols(zhat, w, y)
    # res_tau_ols_ps = tau_ols_ps(zhat, w, y)
    res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)
    res_tau_dr_true_ps = tau_dr(y, w, y0_hat, y1_hat, ps, method)

    return res_tau_dr, res_tau_dr_true_ps
Exemplo n.º 2
0
def ihdp_baseline(set_id=1,
                  prop_miss=0.1,
                  seed=0,
                  d_cevae=20,
                  n_epochs=402,
                  method="glm",
                  **kwargs):

    X = pd.read_csv('./data/IHDP/csv/ihdp_npci_' + str(set_id) + '.csv')
    w = np.array(X.iloc[:, 0]).reshape((-1, 1))
    y = np.array(X.iloc[:, 1]).reshape((-1, 1))

    X = np.array(X.iloc[:, 5:])

    X_miss = ampute(X, prop_miss=prop_miss, seed=seed)

    X_imp_mean = np.zeros(X_miss.shape)
    X_imp_mice = np.zeros(X_miss.shape)
    try:
        from sklearn.impute import SimpleImputer
        X_imp_mean = SimpleImputer().fit_transform(X_miss)
    except:
        pass
    try:
        from sklearn.impute import IterativeImputer
        X_imp_mice = IterativeImputer()().fit_transform(X_miss)
    except:
        pass

    tau = dict()
    for name, zhat in zip(
        ['X', 'X_imp_mean'],  #, 'X_imp_mice', 'Z_perm'],#, 'X_mi'],
        [X, X_imp_mean]):  #, X_imp_mice, Z_perm]):#, X_miss]):

        if name == 'X_mi':
            res_tau_dr, res_tau_ols, res_tau_ols_ps = tau_mi(zhat,
                                                             w,
                                                             y,
                                                             method=method)

        else:
            ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y)
            res_tau_ols = tau_ols(zhat, w, y)
            res_tau_ols_ps = tau_ols_ps(zhat, w, y)
            res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)

        tau[name] = res_tau_dr, res_tau_ols, res_tau_ols_ps

    return tau
Exemplo n.º 3
0
def ihdp_miwae(set_id_range=range(1, 1001),
               prop_miss=0.1,
               seed=0,
               d_miwae=3,
               n_epochs=602,
               sig_prior=1,
               add_wy=False,
               method="glm",
               **kwargs):

    from miwae import miwae
    l_scores = []
    for set_id in set_id_range:
        X = pd.read_csv('./data/IHDP/csv/R_ate_ihdp_npci_' + str(set_id) +
                        '.csv')
        w = np.array(X.iloc[:, 0]).reshape((-1, 1))
        y = np.array(X.iloc[:, 1]).reshape((-1, 1))

        X = np.array(X.iloc[:, 5:])

        X_miss = ampute(X, prop_miss=prop_miss, seed=seed)

        if set_id == 1:
            if add_wy:
                xhat, zhat, zhat_mul = miwae(X_miss,
                                             d_miwae=d_miwae,
                                             sig_prior=sig_prior,
                                             n_epochs=n_epochs,
                                             add_wy=add_wy,
                                             w=w,
                                             y=y)
            else:
                xhat, zhat, zhat_mul = miwae(X_miss,
                                             d_miwae=d_miwae,
                                             sig_prior=sig_prior,
                                             n_epochs=n_epochs,
                                             add_wy=add_wy)

        # print('shape of outputs miwae:')
        # print('xhat.shape, zhat.shape, zhat_mul.shape:')
        #    (1000, 200) (1000, 3) (200, 1000, 3)
            print(xhat.shape, zhat.shape, zhat_mul.shape)
        print(set_id)
        # Tau estimated on Zhat=E[Z|X]
        ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y)
        res_tau_ols = tau_ols(zhat, w, y)
        res_tau_ols_ps = tau_ols_ps(zhat, w, y)
        res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)
        res_tau_diffmu = np.mean(y1_hat - y0_hat)

        lr = LinearRegression()
        lr.fit(zhat, y)
        y_hat = lr.predict(zhat)
        res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method)

        # Tau estimated on Zhat^(b), l=1,...,B sampled from posterior
        res_mul_tau_dr = []
        res_mul_tau_ols = []
        res_mul_tau_ols_ps = []
        res_mul_tau_resid = []
        res_mul_tau_diffmu = []
        for zhat_b in zhat_mul:
            ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat_b, w, y)
            res_mul_tau_dr.append(tau_dr(y, w, y0_hat, y1_hat, ps_hat, method))
            res_mul_tau_ols.append(tau_ols(zhat_b, w, y))
            res_mul_tau_ols_ps.append(tau_ols_ps(zhat_b, w, y))
            res_mul_tau_diffmu.append(np.mean(y1_hat - y0_hat))
            lr = LinearRegression()
            lr.fit(zhat, y)
            y_hat = lr.predict(zhat_b)
            res_mul_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method)

        res_mul_tau_dr = np.mean(res_mul_tau_dr)
        res_mul_tau_ols = np.mean(res_mul_tau_ols)
        res_mul_tau_ols_ps = np.mean(res_mul_tau_ols_ps)
        res_mul_tau_resid = np.mean(res_mul_tau_resid)
        res_mul_tau_diffmu = np.mean(res_mul_tau_diffmu)

        dcor_zhat = np.nan

        dcor_zhat_mul = np.nan

        score = [
            prop_miss, d_miwae, sig_prior, n_epochs, add_wy, set_id,
            res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid,
            res_tau_diffmu, res_mul_tau_dr, res_mul_tau_ols,
            res_mul_tau_ols_ps, res_mul_tau_resid, res_mul_tau_diffmu,
            dcor_zhat, dcor_zhat_mul
        ]
        l_scores.append(score)

    score_data = pd.DataFrame(
        l_scores,
        columns=list([
            'prop_miss', 'd_miwae', 'sig_prior', 'n_epochs', 'add_wy',
            'set_id', 'res_tau_dr', 'res_tau_ols', 'res_tau_ols_ps',
            'res_tau_resid', 'res_tau_diffmu', 'res_mul_tau_dr',
            'res_mul_tau_ols', 'res_mul_tau_ols_ps', 'res_mul_tau_resid',
            'res_mul_tau_diffmu', 'dcor_zhat', 'dcor_zhat_mul'
        ]))

    return score_data
Exemplo n.º 4
0
def ihdp_baseline(set_id=1,
                  prop_miss=0.1,
                  seed=0,
                  full_baseline=False,
                  add_wy=False,
                  sig_prior=1,
                  d_miwae=10,
                  n_epochs=10,
                  method="glm",
                  **kwargs):

    X = pd.read_csv('./data/IHDP/csv/R_ate_ihdp_npci_' + str(set_id) + '.csv')
    w = np.array(X.iloc[:, 0]).reshape((-1, 1))
    y = np.array(X.iloc[:, 1]).reshape((-1, 1))

    X = np.array(X.iloc[:, 5:])

    X_miss = ampute(X, prop_miss=prop_miss, seed=seed)

    X_imp_mean = np.zeros(X_miss.shape)
    X_imp_mice = np.zeros(X_miss.shape)
    try:
        from sklearn.impute import SimpleImputer
        X_imp_mean = SimpleImputer().fit_transform(X_miss)
    except:
        pass
    try:
        from sklearn.impute import IterativeImputer
        X_imp_mice = IterativeImputer()().fit_transform(X_miss)
    except:
        pass

    algo_name = ['X', 'X_imp_mean']
    algo_ = [X, X_imp_mean]

    if full_baseline:
        # complete the baseline
        Z_mf = get_U_softimpute(X_miss)
        # need try-except for sklearn version
        try:
            from sklearn.impute import IterativeImputer
            X_imp = IterativeImputer().fit_transform(X_miss)
        except:
            from sklearn.experimental import enable_iterative_imputer
            from sklearn.impute import IterativeImputer
            X_imp = IterativeImputer().fit_transform(X_miss)

        algo_name += ['X_imp', 'Z_mf']  #, 'Z_perm']
        algo_ += [X_imp, Z_mf]  #, Z_perm]

    tau = dict()
    for name, zhat in zip(algo_name, algo_):

        if name == 'X_mi':
            res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid = tau_mi(
                zhat, w, y, method=method)

        else:
            ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y)
            res_tau_ols = tau_ols(zhat, w, y)
            res_tau_ols_ps = tau_ols_ps(zhat, w, y)
            res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)
            lr = LinearRegression()
            lr.fit(zhat, y)
            y_hat = lr.predict(zhat)
            res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method)

        tau[name] = res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid

    return tau
Exemplo n.º 5
0
def ihdp_miwae(set_id=1,
               prop_miss=0.1,
               seed=0,
               d_miwae=3,
               n_epochs=602,
               sig_prior=1,
               add_wy=False,
               method="glm",
               **kwargs):

    from miwae import miwae

    X = pd.read_csv('./data/IHDP/csv/ihdp_npci_' + str(set_id) + '.csv')
    w = np.array(X.iloc[:, 0]).reshape((-1, 1))
    y = np.array(X.iloc[:, 1]).reshape((-1, 1))

    X = np.array(X.iloc[:, 5:])

    X_miss = ampute(X, prop_miss=prop_miss, seed=seed)

    if add_wy:
        xhat, zhat, zhat_mul = miwae(X_miss,
                                     d_miwae=d_miwae,
                                     sig_prior=sig_prior,
                                     n_epochs=n_epochs,
                                     add_wy=add_wy,
                                     w=w,
                                     y=y)
    else:
        xhat, zhat, zhat_mul = miwae(X_miss,
                                     d_miwae=d_miwae,
                                     sig_prior=sig_prior,
                                     n_epochs=n_epochs,
                                     add_wy=add_wy)

    # print('shape of outputs miwae:')
    # print('xhat.shape, zhat.shape, zhat_mul.shape:')
    #    (1000, 200) (1000, 3) (200, 1000, 3)
    print(xhat.shape, zhat.shape, zhat_mul.shape)

    # Tau estimated on Zhat=E[Z|X]
    ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y)
    res_tau_ols = tau_ols(zhat, w, y)
    res_tau_ols_ps = tau_ols_ps(zhat, w, y)
    res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)

    # Tau estimated on Zhat^(b), l=1,...,B sampled from posterior
    res_mul_tau_dr = []
    res_mul_tau_ols = []
    res_mul_tau_ols_ps = []
    for zhat_b in zhat_mul:
        ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat_b, w, y)
        res_mul_tau_dr.append(tau_dr(y, w, y0_hat, y1_hat, ps_hat, method))
        res_mul_tau_ols.append(tau_ols(zhat_b, w, y))
        res_mul_tau_ols_ps.append(tau_ols_ps(zhat_b, w, y))

    res_mul_tau_dr = np.mean(res_mul_tau_dr)
    res_mul_tau_ols = np.mean(res_mul_tau_ols)
    res_mul_tau_ols_ps = np.mean(res_mul_tau_ols_ps)

    dcor_zhat = np.nan

    dcor_zhat_mul = np.nan

    return res_tau_dr, res_tau_ols, res_tau_ols_ps, res_mul_tau_dr, res_mul_tau_ols, res_mul_tau_ols_ps, dcor_zhat, dcor_zhat_mul
Exemplo n.º 6
0
def exp_miwae(model="dlvm",
              n=1000,
              d=3,
              p=100,
              prop_miss=0.1,
              citcio=False,
              seed=0,
              d_miwae=3,
              n_epochs=602,
              sig_prior=1,
              add_wy=False,
              num_samples_zmul=200,
              method="glm",
              **kwargs):

    from miwae import miwae

    if model == "lrmf":
        Z, X, w, y, ps = gen_lrmf(n=n,
                                  d=d,
                                  p=p,
                                  citcio=citcio,
                                  prop_miss=prop_miss,
                                  seed=seed)
    elif model == "dlvm":
        Z, X, w, y, ps = gen_dlvm(n=n,
                                  d=d,
                                  p=p,
                                  citcio=citcio,
                                  prop_miss=prop_miss,
                                  seed=seed)
    else:
        raise NotImplementedError(
            "Other data generating models not implemented here yet.")

    X_miss = ampute(X, prop_miss=prop_miss, seed=seed)

    if add_wy:
        xhat, zhat, zhat_mul = miwae(X_miss,
                                     d=d_miwae,
                                     sig_prior=sig_prior,
                                     num_samples_zmul=num_samples_zmul,
                                     n_epochs=n_epochs,
                                     add_wy=add_wy,
                                     w=w,
                                     y=y)
    else:
        xhat, zhat, zhat_mul = miwae(X_miss,
                                     d=d_miwae,
                                     sig_prior=sig_prior,
                                     num_samples_zmul=num_samples_zmul,
                                     n_epochs=n_epochs,
                                     add_wy=add_wy)

    # print('shape of outputs miwae:')
    # print('xhat.shape, zhat.shape, zhat_mul.shape:')
    #    (1000, 200) (1000, 3) (200, 1000, 3)
    print(xhat.shape, zhat.shape, zhat_mul.shape)

    # Tau estimated on Zhat=E[Z|X]
    ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y)
    res_tau_ols = tau_ols(zhat, w, y)
    res_tau_ols_ps = tau_ols_ps(zhat, w, y)
    res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)
    lr = LinearRegression()
    lr.fit(zhat, y)
    y_hat = lr.predict(zhat)
    res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method)

    # Tau estimated on Zhat^(b), l=1,...,B sampled from posterior
    res_mul_tau_dr = []
    res_mul_tau_ols = []
    res_mul_tau_ols_ps = []
    res_mul_tau_resid = []
    for zhat_b in zhat_mul:
        ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat_b, w, y)
        res_mul_tau_dr.append(tau_dr(y, w, y0_hat, y1_hat, ps_hat, method))
        res_mul_tau_ols.append(tau_ols(zhat_b, w, y))
        res_mul_tau_ols_ps.append(tau_ols_ps(zhat_b, w, y))
        lr = LinearRegression()
        lr.fit(zhat_b, y)
        y_hat = lr.predict(zhat_b)
        res_mul_tau_resid.append(tau_residuals(y, w, y_hat, ps_hat, method))

    res_mul_tau_dr = np.mean(res_mul_tau_dr)
    res_mul_tau_ols = np.mean(res_mul_tau_ols)
    res_mul_tau_ols_ps = np.mean(res_mul_tau_ols_ps)
    res_mul_tau_resid = np.mean(res_mul_tau_resid)

    if Z.shape[1] == zhat.shape[1]:
        dcor_zhat = dcor(Z, zhat)

    dcor_zhat_mul = []
    for zhat_b in zhat_mul:
        dcor_zhat_mul.append(dcor(Z, zhat_b))
    dcor_zhat_mul = np.mean(dcor_zhat_mul)

    return res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid, res_mul_tau_dr, res_mul_tau_ols, res_mul_tau_ols_ps, res_mul_tau_resid, dcor_zhat, dcor_zhat_mul
Exemplo n.º 7
0
def exp_baseline(model="dlvm",
                 n=1000,
                 d=3,
                 p=100,
                 prop_miss=0.1,
                 citcio=False,
                 seed=0,
                 full_baseline=False,
                 method="glm",
                 **kwargs):

    if model == "lrmf":
        Z, X, w, y, ps = gen_lrmf(n=n,
                                  d=d,
                                  p=p,
                                  citcio=citcio,
                                  prop_miss=prop_miss,
                                  seed=seed)
    elif model == "dlvm":
        Z, X, w, y, ps = gen_dlvm(n=n,
                                  d=d,
                                  p=p,
                                  citcio=citcio,
                                  prop_miss=prop_miss,
                                  seed=seed)
    else:
        raise NotImplementedError(
            "Other data generating models not implemented here yet.")

    X_miss = ampute(X, prop_miss=prop_miss, seed=seed)

    from sklearn.impute import SimpleImputer
    X_imp_mean = SimpleImputer().fit_transform(X_miss)

    Z_perm = np.random.permutation(Z)
    # Z_rnd = np.random.randn(Z.shape[0], Z.shape[1])

    algo_name = ['Z', 'X']  #, 'X_imp_mean']
    algo_ = [Z, X]  #, X_imp_mean]

    if full_baseline:
        # complete the baseline
        Z_mf = get_U_softimpute(X_miss)
        # need try-except for sklearn version
        try:
            from sklearn.impute import IterativeImputer
            X_imp = IterativeImputer().fit_transform(X_miss)
        except:
            from sklearn.experimental import enable_iterative_imputer
            from sklearn.impute import IterativeImputer
            X_imp = IterativeImputer().fit_transform(X_miss)

        algo_name += ['Z_mf']  #['X_imp','Z_mf']#, 'Z_perm']
        algo_ += [Z_mf]  #[X_imp, Z_mf]#, Z_perm]

    tau = dict()
    for name, zhat in zip(algo_name, algo_):

        if name == 'X_mi':
            res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid = tau_mi(
                zhat, w, y, method=method)

        else:
            ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y)
            res_tau_ols = tau_ols(zhat, w, y)
            res_tau_ols_ps = tau_ols_ps(zhat, w, y)
            res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)
            lr = LinearRegression()
            lr.fit(zhat, y)
            y_hat = lr.predict(zhat)
            res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method)

        tau[name] = res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid

    return tau
Exemplo n.º 8
0
def tb_miwae(seed=0,
             d_miwae=3,
             n_epochs=602,
             sig_prior=1,
             add_wy=False,
             method="glm",
             **kwargs):

    from miwae import miwae

    X = pd.read_csv('./data/tb/tb_tbi_17conf.csv')
    w = np.array(X.iloc[:, 0]).reshape((-1, 1))
    y = np.array(X.iloc[:, 1]).reshape((-1, 1))

    X = np.array(X.iloc[:, 2:])

    if add_wy:
        xhat, zhat, zhat_mul = miwae(X,
                                     d_miwae=d_miwae,
                                     sig_prior=sig_prior,
                                     n_epochs=n_epochs,
                                     add_wy=add_wy,
                                     w=w,
                                     y=y)
    else:
        xhat, zhat, zhat_mul = miwae(X,
                                     d_miwae=d_miwae,
                                     sig_prior=sig_prior,
                                     n_epochs=n_epochs,
                                     add_wy=add_wy)

    # print('shape of outputs miwae:')
    # print('xhat.shape, zhat.shape, zhat_mul.shape:')
    #    (1000, 200) (1000, 3) (200, 1000, 3)
    print(xhat.shape, zhat.shape, zhat_mul.shape)

    # Tau estimated on Zhat=E[Z|X]
    ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat, w, y)
    res_tau_ols = tau_ols(zhat, w, y)
    res_tau_ols_ps = tau_ols_ps(zhat, w, y)
    res_tau_dr = tau_dr(y, w, y0_hat, y1_hat, ps_hat, method)

    # lr = LinearRegression()
    # lr.fit(zhat, y)
    # y_hat = lr.predict(zhat)
    # res_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method)

    # Tau estimated on Zhat^(b), l=1,...,B sampled from posterior
    res_mul_tau_dr = []
    res_mul_tau_ols = []
    res_mul_tau_ols_ps = []
    res_mul_tau_resid = []
    for zhat_b in zhat_mul:
        ps_hat, y0_hat, y1_hat = get_ps_y01_hat(zhat_b, w, y)
        res_mul_tau_dr.append(tau_dr(y, w, y0_hat, y1_hat, ps_hat, method))
        res_mul_tau_ols.append(tau_ols(zhat_b, w, y))
        res_mul_tau_ols_ps.append(tau_ols_ps(zhat_b, w, y))
        # lr = LinearRegression()
        # lr.fit(zhat, y)
        # y_hat = lr.predict(zhat_b)
        # res_mul_tau_resid = tau_residuals(y, w, y_hat, ps_hat, method)

    res_mul_tau_dr = np.mean(res_mul_tau_dr)
    res_mul_tau_ols = np.mean(res_mul_tau_ols)
    res_mul_tau_ols_ps = np.mean(res_mul_tau_ols_ps)
    # res_mul_tau_resid = np.mean(res_mul_tau_resid)

    dcor_zhat = np.nan

    dcor_zhat_mul = np.nan

    # return res_tau_dr, res_tau_ols, res_tau_ols_ps, res_tau_resid, res_mul_tau_dr, res_mul_tau_ols, res_mul_tau_ols_ps, res_mul_tau_resid, dcor_zhat, dcor_zhat_mul
    return res_tau_dr, res_tau_ols, res_tau_ols_ps, res_mul_tau_dr, res_mul_tau_ols, res_mul_tau_ols_ps, dcor_zhat, dcor_zhat_mul