def main(args, reptition=1, path="./IHDP/"): pyro.enable_validation(__debug__) # if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') # Generate synthetic data. pyro.set_rng_seed(args.seed) train, test, contfeats, binfeats = IHDP(path=path, reps=reptition, cuda=True) (x_train, t_train, y_train), true_ite_train = train (x_test, t_test, y_test), true_ite_test = test ym, ys = y_train.mean(), y_train.std() y_train = (y_train - ym) / ys # Train. pyro.set_rng_seed(args.seed) pyro.clear_param_store() tedvae = TEDVAE(feature_dim=args.feature_dim, continuous_dim=contfeats, binary_dim=binfeats, latent_dim=args.latent_dim, latent_dim_t=args.latent_dim_t, latent_dim_y=args.latent_dim_y, hidden_dim=args.hidden_dim, num_layers=args.num_layers, num_samples=10) tedvae.fit(x_train, t_train, y_train, num_epochs=args.num_epochs, batch_size=args.batch_size, learning_rate=args.learning_rate, learning_rate_decay=args.learning_rate_decay, weight_decay=args.weight_decay) # Evaluate. est_ite = tedvae.ite(x_test, ym, ys) est_ite_train = tedvae.ite(x_train, ym, ys) pehe = np.sqrt( np.mean((true_ite_test.squeeze() - est_ite.cpu().numpy()) * (true_ite_test.squeeze() - est_ite.cpu().numpy()))) pehe_train = np.sqrt( np.mean((true_ite_train.squeeze() - est_ite_train.cpu().numpy()) * (true_ite_train.squeeze() - est_ite_train.cpu().numpy()))) print("PEHE_train = {:0.3g}".format(pehe_train)) print("PEHE = {:0.3g}".format(pehe)) return pehe, pehe_train
parser = ArgumentParser() parser.add_argument('-reps', type=int, default=10) parser.add_argument('-earl', type=int, default=10) parser.add_argument('-lr', type=float, default=0.001) parser.add_argument('-opt', choices=['adam', 'adamax'], default='adam') parser.add_argument('-epochs', type=int, default=100) parser.add_argument('-print_every', type=int, default=10) args = parser.parse_args() args.true_post = True ed.set_seed(1) np.random.seed(1) tf.set_random_seed(1) dataset = IHDP(replications=args.reps) scores = np.zeros((args.reps, 3)) scores_test = np.zeros((args.reps, 3)) M = None d = 20 # latent space dimension lamba = 1e-4 # weight decay nh, h = 5, 200 # number and size of hidden layers for i, (train, valid, test, contfeats, binfeats) in enumerate(dataset.get_train_valid_test()): print('\nReplication {}/{}'.format(i + 1, args.reps)) (xtr, ttr, ytr), (y_cftr, mu0tr, mu1tr) = train (xva, tva, yva), (y_cfva, mu0va, mu1va) = valid (xte, tte, yte), (y_cfte, mu0te, mu1te) = test evaluator_test = Evaluator(yte, tte, y_cf=y_cfte, mu0=mu0te, mu1=mu1te)
pnoise_size = args.pn_size pnoise_scale = args.pn_scale adv_scale = args.adv_scale data_path = args.data_path save_model = args.save_model if not save_model: save_model = 'models/' + exp_name if not os.path.exists(save_model): os.mkdir(save_model) load_model = args.load_model # data_pref = '_'.join([ str(i) for i in [pnoise_type, pnoise_size, pnoise_scale, ""]]) if pnoise_type is not None else "" data_pref = args.data_pref if task == 'ihdp': dataset = IHDP(replications=args.reps, data_pref=data_pref, data_ratio=args.data_ratio) elif task == 'twins': dataset = TWINS(replications=args.reps, data_pref=data_pref) elif task == 'jobs': dataset = JOBS(replications=args.reps, data_pref=data_pref) scores = np.zeros((args.reps, 3)) scores_test = np.zeros((args.reps, 3)) M = None # batch size during training d = args.latent_dim # latent dimension lamba = args.lamba # weight decay nh, h = args.nh, args.h_dim # number and size of hidden layers batch_size = args.batch_size epochs = args.epochs lr = args.lr drop_ratio = args.drop_ratio
yerr=std_a1, ecolor='b', alpha=0.6, label='a=1') plt.title(key) plt.xlabel('Y values') plt.ylabel('Prediction value count') plt.legend() subidx += 1 plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.2) plt.savefig('results/compareY.png') if __name__ == "__main__": directory = './models/test_models/' dataset = IHDP(replications=parameters.reps) # store result data data_results = defaultdict(list) # loop through different data generation instances (replications) for rep_i, data in enumerate(dataset.get_train_valid_test()): print('Data repetition %i' % rep_i) # loop through different files of trained CEVAEs for cevae_file in os.listdir(directory): # only select models trained on this dataset, standard case just 1 if cevae_file.endswith('.pt') and int(cevae_file[4]) == rep_i: # Recover arguments from filename for model_par in cevae_file.split('/')[-1][:-4].split('_'): # split into key and value, dtype depending on variable
from argparse import ArgumentParser from initialisation import init_qz from datasets import IHDP from evaluation import Evaluator, get_y0_y1 from networks import p_x_z, p_t_z, p_y_zt, q_t_x, q_y_xt, q_z_tyx import numpy as np import matplotlib.pyplot as plt from collections import defaultdict import torch from torch.distributions import normal from torch import optim dataset = IHDP() def sqrt_pehe(mu1, mu0, ypred1, ypred0): return np.sqrt(np.mean(np.square((mu1 - mu0) - (ypred1 - ypred0)))) def get_train_data(train, valid, test, contfeats, binfeats): # read out data (xtr, ttr, ytr), (y_cftr, mu0tr, mu1tr) = train (xva, tva, yva), (y_cfva, mu0va, mu1va) = valid (xte, tte, yte), (y_cfte, mu0te, mu1te) = test # reorder features with binary first and continuous after perm = binfeats + contfeats xtr, xva, xte = xtr[:, perm], xva[:, perm], xte[:, perm] # concatenate train and valid for training xalltr, talltr, yalltr = np.concatenate([xtr, xva], axis=0), np.concatenate([ttr, tva], axis=0), np.concatenate(
parser.add_argument('-reps', type=int, default=10) parser.add_argument('-earl', type=int, default=10) parser.add_argument('-lr', type=float, default=0.001) parser.add_argument('-opt', choices=['adam', 'adamax'], default='adam') parser.add_argument('-epochs', type=int, default=100) parser.add_argument('-print_every', type=int, default=10) parser.add_argument('-data', type=str, default='ihdp') args = parser.parse_args() args.true_post = True data_name = args.data if data_name == 'twins': dataset = TWINS(replications=args.reps) elif data_name == 'ihdp': dataset = IHDP(replications=args.reps) dimx = dataset.dimx scores = np.zeros((args.reps, 3)) scores_test = np.zeros((args.reps, 3)) rmses = np.zeros((args.reps, 3)) M = None # batch size during training d = 20 # latent dimension lamba = 1e-4 # weight decay nh, h = 3, 200 # number and size of hidden layers for i, (train, valid, test, contfeats, binfeats) in enumerate(dataset.get_train_valid_test()): print '\nReplication {}/{}'.format(i + 1, args.reps) (xtr, ttr, ytr), (y_cftr, mu0tr, mu1tr) = train
parser.add_argument('-repTrain', type=int, default=2) parser.add_argument('-zDim', type=int, default=5) parser.add_argument('-hDimQ', type=int, default=10) parser.add_argument('-hDimTar', type=int, default=10) parser.add_argument('-epochs', type=int, default=400) parser.add_argument('-batch', type=int, default=50) parser.add_argument('-lr', type=float, default=0.0005) parser.add_argument('-decay', type=float, default=0.) parser.add_argument('-printEvery', type=int, default=1) parser.add_argument('-nSamplesZ', type=int, default=2) parser.add_argument('-comment', type=str, default='None') # tStart: number of epochs only training t dist by weighting loss parser.add_argument('-tStart', type=int, default=80) args = parser.parse_args() dataset = IHDP(replications=args.reps) # number of continuous features, see datasets/columns file x_con_n = 5 x_bin_n = 19 x_dim = [x_bin_n, x_con_n] # Loop for replications DGP for rep_i, (train, valid, test) in enumerate(dataset.get_train_valid_test()): print('\nReplication %i/%i' % (rep_i + 1, args.reps)) # loop for replications CEVAE training on same data for train_i in range(args.repTrain): # read out data (xtr, atr, ttr, ytr), (y_cftr, mutr, ztr, t_cftr) = train (xva, ava, tva, yva), (y_cfva, muva, zva, t_cfva) = valid (xte, ate, tte, yte), (y_cfte, mute, zte, t_cfte) = test