Exemplo n.º 1
0
def main(args, reptition=1, path="./IHDP/"):
    pyro.enable_validation(__debug__)
    # if args.cuda:
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    # Generate synthetic data.
    pyro.set_rng_seed(args.seed)
    train, test, contfeats, binfeats = IHDP(path=path,
                                            reps=reptition,
                                            cuda=True)
    (x_train, t_train, y_train), true_ite_train = train
    (x_test, t_test, y_test), true_ite_test = test

    ym, ys = y_train.mean(), y_train.std()
    y_train = (y_train - ym) / ys

    # Train.
    pyro.set_rng_seed(args.seed)
    pyro.clear_param_store()
    tedvae = TEDVAE(feature_dim=args.feature_dim,
                    continuous_dim=contfeats,
                    binary_dim=binfeats,
                    latent_dim=args.latent_dim,
                    latent_dim_t=args.latent_dim_t,
                    latent_dim_y=args.latent_dim_y,
                    hidden_dim=args.hidden_dim,
                    num_layers=args.num_layers,
                    num_samples=10)
    tedvae.fit(x_train,
               t_train,
               y_train,
               num_epochs=args.num_epochs,
               batch_size=args.batch_size,
               learning_rate=args.learning_rate,
               learning_rate_decay=args.learning_rate_decay,
               weight_decay=args.weight_decay)

    # Evaluate.
    est_ite = tedvae.ite(x_test, ym, ys)
    est_ite_train = tedvae.ite(x_train, ym, ys)

    pehe = np.sqrt(
        np.mean((true_ite_test.squeeze() - est_ite.cpu().numpy()) *
                (true_ite_test.squeeze() - est_ite.cpu().numpy())))
    pehe_train = np.sqrt(
        np.mean((true_ite_train.squeeze() - est_ite_train.cpu().numpy()) *
                (true_ite_train.squeeze() - est_ite_train.cpu().numpy())))
    print("PEHE_train = {:0.3g}".format(pehe_train))

    print("PEHE = {:0.3g}".format(pehe))
    return pehe, pehe_train
Exemplo n.º 2
0
parser = ArgumentParser()
parser.add_argument('-reps', type=int, default=10)
parser.add_argument('-earl', type=int, default=10)
parser.add_argument('-lr', type=float, default=0.001)
parser.add_argument('-opt', choices=['adam', 'adamax'], default='adam')
parser.add_argument('-epochs', type=int, default=100)
parser.add_argument('-print_every', type=int, default=10)
args = parser.parse_args()

args.true_post = True

ed.set_seed(1)
np.random.seed(1)
tf.set_random_seed(1)

dataset = IHDP(replications=args.reps)
scores = np.zeros((args.reps, 3))
scores_test = np.zeros((args.reps, 3))

M = None
d = 20  # latent space dimension
lamba = 1e-4  # weight decay
nh, h = 5, 200  # number and size of hidden layers

for i, (train, valid, test, contfeats,
        binfeats) in enumerate(dataset.get_train_valid_test()):
    print('\nReplication {}/{}'.format(i + 1, args.reps))
    (xtr, ttr, ytr), (y_cftr, mu0tr, mu1tr) = train
    (xva, tva, yva), (y_cfva, mu0va, mu1va) = valid
    (xte, tte, yte), (y_cfte, mu0te, mu1te) = test
    evaluator_test = Evaluator(yte, tte, y_cf=y_cfte, mu0=mu0te, mu1=mu1te)
Exemplo n.º 3
0
pnoise_size = args.pn_size
pnoise_scale = args.pn_scale
adv_scale = args.adv_scale

data_path = args.data_path
save_model = args.save_model
if not save_model:
    save_model = 'models/' + exp_name
if not os.path.exists(save_model):
    os.mkdir(save_model)
load_model = args.load_model

# data_pref = '_'.join([ str(i) for i in [pnoise_type, pnoise_size, pnoise_scale, ""]]) if pnoise_type is not None else ""
data_pref = args.data_pref
if task == 'ihdp':
    dataset = IHDP(replications=args.reps, data_pref=data_pref, data_ratio=args.data_ratio)
elif task == 'twins':
    dataset = TWINS(replications=args.reps, data_pref=data_pref)
elif task == 'jobs':
    dataset = JOBS(replications=args.reps, data_pref=data_pref)
scores = np.zeros((args.reps, 3))
scores_test = np.zeros((args.reps, 3))

M = None  # batch size during training
d = args.latent_dim  # latent dimension
lamba = args.lamba  # weight decay
nh, h = args.nh, args.h_dim  # number and size of hidden layers
batch_size = args.batch_size
epochs = args.epochs
lr = args.lr
drop_ratio = args.drop_ratio
Exemplo n.º 4
0
                yerr=std_a1,
                ecolor='b',
                alpha=0.6,
                label='a=1')
        plt.title(key)
        plt.xlabel('Y values')
        plt.ylabel('Prediction value count')
        plt.legend()
        subidx += 1
    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.2)
    plt.savefig('results/compareY.png')


if __name__ == "__main__":
    directory = './models/test_models/'
    dataset = IHDP(replications=parameters.reps)
    # store result data
    data_results = defaultdict(list)
    # loop through different data generation instances (replications)
    for rep_i, data in enumerate(dataset.get_train_valid_test()):
        print('Data repetition %i' % rep_i)

        # loop through different files of trained CEVAEs
        for cevae_file in os.listdir(directory):

            # only select models trained on this dataset, standard case just 1
            if cevae_file.endswith('.pt') and int(cevae_file[4]) == rep_i:

                # Recover arguments from filename
                for model_par in cevae_file.split('/')[-1][:-4].split('_'):
                    # split into key and value, dtype depending on variable
Exemplo n.º 5
0
from argparse import ArgumentParser

from initialisation import init_qz
from datasets import IHDP
from evaluation import Evaluator, get_y0_y1
from networks import p_x_z, p_t_z, p_y_zt, q_t_x, q_y_xt, q_z_tyx

import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict

import torch
from torch.distributions import normal
from torch import optim

dataset = IHDP()

def sqrt_pehe(mu1, mu0, ypred1, ypred0):
    return np.sqrt(np.mean(np.square((mu1 - mu0) - (ypred1 - ypred0))))

def get_train_data(train, valid, test, contfeats, binfeats):
    # read out data
    (xtr, ttr, ytr), (y_cftr, mu0tr, mu1tr) = train
    (xva, tva, yva), (y_cfva, mu0va, mu1va) = valid
    (xte, tte, yte), (y_cfte, mu0te, mu1te) = test

    # reorder features with binary first and continuous after
    perm = binfeats + contfeats
    xtr, xva, xte = xtr[:, perm], xva[:, perm], xte[:, perm]
    # concatenate train and valid for training
    xalltr, talltr, yalltr = np.concatenate([xtr, xva], axis=0), np.concatenate([ttr, tva], axis=0), np.concatenate(
Exemplo n.º 6
0
parser.add_argument('-reps', type=int, default=10)
parser.add_argument('-earl', type=int, default=10)
parser.add_argument('-lr', type=float, default=0.001)
parser.add_argument('-opt', choices=['adam', 'adamax'], default='adam')
parser.add_argument('-epochs', type=int, default=100)
parser.add_argument('-print_every', type=int, default=10)
parser.add_argument('-data', type=str, default='ihdp')
args = parser.parse_args()

args.true_post = True

data_name = args.data
if data_name == 'twins':
    dataset = TWINS(replications=args.reps)
elif data_name == 'ihdp':
    dataset = IHDP(replications=args.reps)

dimx = dataset.dimx
scores = np.zeros((args.reps, 3))
scores_test = np.zeros((args.reps, 3))
rmses = np.zeros((args.reps, 3))

M = None  # batch size during training
d = 20  # latent dimension
lamba = 1e-4  # weight decay
nh, h = 3, 200  # number and size of hidden layers

for i, (train, valid, test, contfeats,
        binfeats) in enumerate(dataset.get_train_valid_test()):
    print '\nReplication {}/{}'.format(i + 1, args.reps)
    (xtr, ttr, ytr), (y_cftr, mu0tr, mu1tr) = train
Exemplo n.º 7
0
parser.add_argument('-repTrain', type=int, default=2)
parser.add_argument('-zDim', type=int, default=5)
parser.add_argument('-hDimQ', type=int, default=10)
parser.add_argument('-hDimTar', type=int, default=10)
parser.add_argument('-epochs', type=int, default=400)
parser.add_argument('-batch', type=int, default=50)
parser.add_argument('-lr', type=float, default=0.0005)
parser.add_argument('-decay', type=float, default=0.)
parser.add_argument('-printEvery', type=int, default=1)
parser.add_argument('-nSamplesZ', type=int, default=2)
parser.add_argument('-comment', type=str, default='None')
# tStart: number of epochs only training t dist by weighting loss
parser.add_argument('-tStart', type=int, default=80)
args = parser.parse_args()

dataset = IHDP(replications=args.reps)
# number of continuous features, see datasets/columns file
x_con_n = 5
x_bin_n = 19
x_dim = [x_bin_n, x_con_n]

# Loop for replications DGP
for rep_i, (train, valid, test) in enumerate(dataset.get_train_valid_test()):
    print('\nReplication %i/%i' % (rep_i + 1, args.reps))
    # loop for replications CEVAE training on same data
    for train_i in range(args.repTrain):

        # read out data
        (xtr, atr, ttr, ytr), (y_cftr, mutr, ztr, t_cftr) = train
        (xva, ava, tva, yva), (y_cfva, muva, zva, t_cfva) = valid
        (xte, ate, tte, yte), (y_cfte, mute, zte, t_cfte) = test