def run(args, outdir): """ Run training for standard NN4 architecture. """ ''' Hyperparameters ''' epochs = int(args.iterations) learning_rate = float(args.learning_rate) wd = float(args.weight_decay) hidden_size = int(args.hidden_size) train_experiments = int(args.experiments) learning_rate_factor = float(args.learning_rate_factor) learning_rate_steps = int( args.learning_rate_steps ) # changes the learning rate for every n updates. epoch_output_iter = int(args.epoch_output_iter) ''' Logging ''' logfile = outdir + 'log.txt' f = open(logfile, 'w') f.close() ''' Set GPUs/CPUs ''' num_gpus = mx.context.num_gpus() num_workers = int( args.num_workers) # replace num_workers with the number of cores ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] batch_size_per_unit = int(args.batch_size_per_unit) # mini-batch size batch_size = batch_size_per_unit * max(num_gpus, 1) ''' Set seeds ''' for c in ctx: mx.random.seed(int(args.seed), c) np.random.seed(int(args.seed)) ''' Feed Forward Neural Network Model (4 hidden layers) ''' net = ff4_relu_architecture(hidden_size) ''' Load dataset ''' # train_dataset = load_data('../' + args.data_dir + args.data_train) # PyCharm run train_dataset = load_data(args.data_dir + args.data_train) # Terminal run ''' Instantiate net ''' net.initialize(init=init.Xavier(), ctx=ctx) net.hybridize() # hybridize for better performance # TODO decide upon ''' Plot net graph ''' # x_sym = mx.sym.var('data') # sym = net(x_sym) # mx.viz.plot_network(sym, title=args.architecture.lower() + "_plot").view( # filename=outdir + args.architecture.lower() + "_plot") ''' Metric, Loss and Optimizer ''' rmse_metric = mx.metric.RMSE() l2_loss = gluon.loss.L2Loss() scheduler = mx.lr_scheduler.FactorScheduler(step=learning_rate_steps, factor=learning_rate_factor, base_lr=learning_rate) optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=scheduler) # optimizer = mx.optimizer.RMSProp(learning_rate=learning_rate, lr_scheduler=scheduler, wd=wd) trainer = gluon.Trainer(net.collect_params(), optimizer=optimizer) ''' Initialize train score results ''' train_scores = np.zeros((train_experiments, 3)) ''' Initialize train experiment durations ''' train_durations = np.zeros((train_experiments, 1)) ''' Initialize test score results ''' test_scores = np.zeros((train_experiments, 3)) ''' Train experiments means and stds ''' means = np.array([]) stds = np.array([]) ''' Train ''' for train_experiment in range(train_experiments): ''' Create training dataset ''' x = train_dataset['x'][:, :, train_experiment] t = np.reshape(train_dataset['t'][:, train_experiment], (-1, 1)) yf = train_dataset['yf'][:, train_experiment] ycf = train_dataset['ycf'][:, train_experiment] mu0 = train_dataset['mu0'][:, train_experiment] mu1 = train_dataset['mu1'][:, train_experiment] train, valid, test, _ = split_data_in_train_valid_test( x, t, yf, ycf, mu0, mu1) ''' With-in sample ''' train_evaluator = Evaluator( np.concatenate([train['t'], valid['t']]), np.concatenate([train['yf'], valid['yf']]), y_cf=np.concatenate([train['ycf'], valid['ycf']], axis=0), mu0=np.concatenate([train['mu0'], valid['mu0']], axis=0), mu1=np.concatenate([train['mu1'], valid['mu1']], axis=0)) test_evaluator = Evaluator(test['t'], test['yf'], test['ycf'], test['mu0'], test['mu1']) ''' Normalize yf ''' # todo normalize option as others , or always default norm? yf_m, yf_std = np.mean(train['yf'], axis=0), np.std(train['yf'], axis=0) train['yf'] = (train['yf'] - yf_m) / yf_std valid['yf'] = (valid['yf'] - yf_m) / yf_std test['yf'] = (test['yf'] - yf_m) / yf_std ''' Save mean and std ''' means = np.append(means, yf_m) stds = np.append(stds, yf_std) ''' Train dataset ''' factual_features = np.hstack((train['x'], train['t'])) train_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(factual_features), mx.nd.array(train['yf'])) ''' With-in sample ''' train_rmse_ite_dataset = gluon.data.ArrayDataset( mx.nd.array(np.concatenate([train['x'], valid['x']]))) ''' Valid dataset ''' valid_factual_features = np.hstack((valid['x'], valid['t'])) valid_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(valid_factual_features), mx.nd.array(valid['yf'])) ''' Test dataset ''' test_rmse_ite_dataset = gluon.data.ArrayDataset( mx.nd.array(test['x'])) # todo rename, rmse_ite has nothing to do ''' Train DataLoader ''' train_factual_loader = gluon.data.DataLoader(train_factual_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) train_rmse_ite_loader = gluon.data.DataLoader(train_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Valid DataLoader ''' valid_factual_loader = gluon.data.DataLoader(valid_factual_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Test DataLoader ''' test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) num_batch = len(train_factual_loader) train_start = time.time() ''' Train model ''' for epoch in range( 1, epochs + 1): # start with epoch 1 for easier learning rate calculation start = time.time() train_loss = 0 rmse_metric.reset() for i, (batch_f_features, batch_yf) in enumerate(train_factual_loader): ''' Get data and labels into slices and copy each slice into a context. ''' batch_f_features = gluon.utils.split_and_load(batch_f_features, ctx_list=ctx, even_split=False) batch_yf = gluon.utils.split_and_load(batch_yf, ctx_list=ctx, even_split=False) ''' Forward ''' with autograd.record(): outputs = [net(x) for x in batch_f_features] loss = [ l2_loss(yhat, y) for yhat, y in zip(outputs, batch_yf) ] ''' Backward ''' for l in loss: l.backward() ''' Optimize ''' trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) rmse_metric.update(batch_yf, outputs) if epoch % epoch_output_iter == 0 or epoch == 1: _, train_rmse_factual = rmse_metric.get() train_loss /= num_batch _, valid_rmse_factual = test_net(net, valid_factual_loader, ctx) log( logfile, '[Epoch %d/%d] Train-rmse-factual: %.3f, Loss: %.3f | Valid-rmse-factual: %.3f | learning-rate: ' '%.3E' % (epoch, epochs, train_rmse_factual, train_loss, valid_rmse_factual, trainer.learning_rate)) train_durations[train_experiment, :] = time.time() - train_start ''' Test model ''' y_t0, y_t1 = predict_treated_and_controlled(net, train_rmse_ite_loader, ctx) y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m train_score = train_evaluator.get_metrics(y_t1, y_t0) train_scores[train_experiment, :] = train_score y_t0, y_t1 = predict_treated_and_controlled(net, test_rmse_ite_loader, ctx) y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m test_score = test_evaluator.get_metrics(y_t1, y_t0) test_scores[train_experiment, :] = test_score log(logfile, '[Train Replication {}/{}]: train RMSE ITE: {:0.3f}, train ATE: {:0.3f}, train PEHE: {:0.3f},' \ ' test RMSE ITE: {:0.3f}, test ATE: {:0.3f}, test PEHE: {:0.3f}'.format(train_experiment + 1, train_experiments, train_score[0], train_score[1], train_score[2], test_score[0], test_score[1], test_score[2])) ''' Save means and stds NDArray values for inference ''' mx.nd.save( outdir + args.architecture.lower() + '_means_stds_ihdp_' + str(train_experiments) + '_.nd', { "means": mx.nd.array(means), "stds": mx.nd.array(stds) }) ''' Export trained model ''' net.export(outdir + args.architecture.lower() + "-ihdp-predictions-" + str(train_experiments), epoch=epochs) log(logfile, '\n{} architecture total scores:'.format(args.architecture.upper())) ''' Train and test scores ''' means, stds = np.mean(train_scores, axis=0), sem(train_scores, axis=0, ddof=0) r_pehe_mean, r_pehe_std = np.mean(np.sqrt(train_scores[:, 2]), axis=0), sem(np.sqrt(train_scores[:, 2]), axis=0, ddof=0) train_total_scores_str = 'train RMSE ITE: {:.2f} ± {:.2f}, train ATE: {:.2f} ± {:.2f}, train PEHE: {:.2f} ± {:.2f}, ' \ 'train root PEHE: {:.2f} ± {:.2f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std) means, stds = np.mean(test_scores, axis=0), sem(test_scores, axis=0, ddof=0) r_pehe_mean, r_pehe_std = np.mean(np.sqrt(test_scores[:, 2]), axis=0), sem(np.sqrt(test_scores[:, 2]), axis=0, ddof=0) test_total_scores_str = 'test RMSE ITE: {:.2f} ± {:.2f}, test ATE: {:.2f} ± {:.2f}, test PEHE: {:.2f} ± {:.2f}, ' \ 'test root PEHE: {:.2f} ± {:.2f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std) log(logfile, train_total_scores_str) log(logfile, test_total_scores_str) mean_duration = float("{0:.2f}".format( np.mean(train_durations, axis=0)[0])) with open(outdir + args.architecture.lower() + "-total-scores-" + str(train_experiments), "w", encoding="utf8") as text_file: print(train_total_scores_str, "\n", test_total_scores_str, file=text_file) return { "ite": "{:.2f} ± {:.2f}".format(means[0], stds[0]), "ate": "{:.2f} ± {:.2f}".format(means[1], stds[1]), "pehe": "{:.2f} ± {:.2f}".format(means[2], stds[2]), "mean_duration": mean_duration }
def run_test(args): """ Run testing for standard NN4 architecture. """ ''' Set GPUs/CPUs ''' num_gpus = mx.context.num_gpus() num_workers = int( args.num_workers) # replace num_workers with the number of cores ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] batch_size_per_unit = int(args.batch_size_per_unit) # mini-batch size batch_size = batch_size_per_unit * max(num_gpus, 1) ''' Load test dataset ''' test_dataset = load_data('../' + args.data_dir + args.data_test) train_dataset = load_data('../' + args.data_dir + args.data_train) ''' Load training means and stds ''' train_means_stds = mx.nd.load(args.means_stds) train_means = train_means_stds['means'] train_stds = train_means_stds['stds'] with warnings.catch_warnings(): warnings.simplefilter("ignore") net = gluon.nn.SymbolBlock.imports(args.symbol, ['data'], args.params, ctx=ctx) ''' Calculate number of test experiments ''' test_experiments = np.min([test_dataset['x'].shape[2], len(train_means)]) ''' Initialize test score results ''' test_scores = np.zeros((test_experiments, 3)) ''' Test model ''' for test_experiment in range(test_experiments): ''' Create testing dataset ''' x = test_dataset['x'][:, :, test_experiment] t = np.reshape(test_dataset['t'][:, test_experiment], (-1, 1)) yf = test_dataset['yf'][:, test_experiment] ycf = test_dataset['ycf'][:, test_experiment] mu0 = test_dataset['mu0'][:, test_experiment] mu1 = test_dataset['mu1'][:, test_experiment] ''' With-in sample ''' test_evaluator = Evaluator(t, yf, ycf, mu0, mu1) ''' Retrieve training mean and std ''' train_yf_m, train_yf_std = train_means[test_experiment].asnumpy( ), train_stds[test_experiment].asnumpy() ''' Test dataset ''' test_rmse_ite_dataset = gluon.data.ArrayDataset(mx.nd.array(x)) ''' Test DataLoader ''' test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Test model ''' y_t0, y_t1 = predict_treated_and_controlled(net, test_rmse_ite_loader, ctx) y_t0, y_t1 = y_t0 * train_yf_std + train_yf_m, y_t1 * train_yf_std + train_yf_m test_score = test_evaluator.get_metrics(y_t1, y_t0) test_scores[test_experiment, :] = test_score print( '[Test Replication {}/{}]:\tRMSE ITE: {:0.3f},\t\t ATE: {:0.3f},\t\t PEHE: {:0.3f}' .format(test_experiment + 1, test_experiments, test_score[0], test_score[1], test_score[2])) means, stds = np.mean(test_scores, axis=0), sem(test_scores, axis=0, ddof=0) print('test RMSE ITE: {:.3f} ± {:.3f}, test ATE: {:.3f} ± {:.3f}, test PEHE: {:.3f} ± {:.3f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2]))
def run(outdir): """ Runs a set of training and validation experiments and stores result in a directory. """ ''' Set up paths and start log ''' logfile = outdir + 'log.txt' f = open(logfile, 'w') f.close() ''' Hyperparameters ''' epochs = int(FLAGS.iterations) learning_rate = float(FLAGS.learning_rate) wd = float(FLAGS.weight_decay) train_experiments = int(FLAGS.experiments) learning_rate_factor = float(FLAGS.learning_rate_factor) learning_rate_steps = int( FLAGS.learning_rate_steps ) # changes the learning rate for every n updates. ''' Logging ''' logfile = outdir + 'log.txt' f = open(logfile, 'w') f.close() data_train = FLAGS.data_dir + FLAGS.data_train data_train_valid = FLAGS.data_dir + FLAGS.data_test ''' Set GPUs/CPUs ''' num_gpus = mx.context.num_gpus() num_workers = int( FLAGS.num_workers) # replace num_workers with the number of cores ctx = mx.gpu() if num_gpus > 0 else mx.cpu() units = num_gpus if num_gpus > 0 else 1 batch_size_per_unit = int(FLAGS.batch_size_per_unit) # mini-batch size batch_size = batch_size_per_unit * max(units, 1) ''' Set random seeds ''' random.seed(FLAGS.seed) np.random.seed(FLAGS.seed) mx.random.seed(FLAGS.seed) ''' Save parameters ''' save_config(outdir + 'config.txt', FLAGS) log( logfile, 'Training with hyperparameters: alpha=%.2g, lambda=%.2g' % (FLAGS.p_alpha, FLAGS.weight_decay)) ''' Load dataset ''' train_dataset = load_data(data_train, normalize=FLAGS.normalize_input) log(logfile, 'Training data: ' + data_train) log(logfile, 'Valid data: ' + data_train_valid) log( logfile, 'Loaded data with shape [%d,%d]' % (train_dataset['n'], train_dataset['dim'])) ''' CFR Neural Network Architecture for ITE estimation ''' net = CFRNet(FLAGS.dim_rep, FLAGS.dim_hyp, FLAGS.weight_init_scale, train_dataset['dim'], FLAGS.batch_norm) ''' Instantiate net ''' net.initialize(ctx=ctx) net.hybridize() # hybridize for better performance ''' Metric, Loss and Optimizer ''' rmse_metric = mx.metric.RMSE() l2_loss = gluon.loss.L2Loss() wass_loss = WassersteinLoss( lam=FLAGS.wass_lambda, its=FLAGS.wass_iterations, square=True, backpropT=FLAGS.wass_bpg) # Change too at hybrid_test_net_with_cfr scheduler = mx.lr_scheduler.FactorScheduler(step=learning_rate_steps, factor=learning_rate_factor, base_lr=learning_rate) optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=scheduler) # optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=scheduler, wd=wd) trainer = gluon.Trainer(net.collect_params(), optimizer=optimizer) ''' Initialize train score results ''' train_scores = np.zeros((train_experiments, 3)) ''' Initialize train experiment durations ''' train_durations = np.zeros((train_experiments, 1)) ''' Initialize valid score results ''' test_scores = np.zeros((train_experiments, 3)) ''' Train experiments means and stds ''' means = np.array([]) stds = np.array([]) ''' Train ''' for train_experiment in range(train_experiments): ''' Create training dataset ''' x = train_dataset['x'][:, :, train_experiment] t = np.reshape(train_dataset['t'][:, train_experiment], (-1, 1)) yf = train_dataset['yf'][:, train_experiment] ycf = train_dataset['ycf'][:, train_experiment] mu0 = train_dataset['mu0'][:, train_experiment] mu1 = train_dataset['mu1'][:, train_experiment] train, valid, test, _ = split_data_in_train_valid_test( x, t, yf, ycf, mu0, mu1) ''' With-in sample ''' train_evaluator = Evaluator( np.concatenate([train['t'], valid['t']]), np.concatenate([train['yf'], valid['yf']]), y_cf=np.concatenate([train['ycf'], valid['ycf']], axis=0), mu0=np.concatenate([train['mu0'], valid['mu0']], axis=0), mu1=np.concatenate([train['mu1'], valid['mu1']], axis=0)) test_evaluator = Evaluator(test['t'], test['yf'], test['ycf'], test['mu0'], test['mu1']) ''' Plot first experiment original TSNE visualization ''' if train_experiment == 0: ''' Learned representations of first experiment for TSNE visualization ''' first_exp_reps = [] ''' Normalize yf ''' if FLAGS.normalize_input: yf_m, yf_std = np.mean(train['yf'], axis=0), np.std(train['yf'], axis=0) train['yf'] = (train['yf'] - yf_m) / yf_std valid['yf'] = (valid['yf'] - yf_m) / yf_std test['yf'] = (test['yf'] - yf_m) / yf_std ''' Save mean and std ''' means = np.append(means, yf_m) stds = np.append(stds, yf_std) ''' Train dataset ''' factual_features = np.hstack((train['x'], train['t'])) train_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(factual_features), mx.nd.array(train['yf'])) ''' With-in sample ''' train_rmse_ite_dataset = gluon.data.ArrayDataset( mx.nd.array(np.concatenate([train['x'], valid['x']]))) ''' Valid dataset ''' valid_factual_features = np.hstack((valid['x'], valid['t'])) valid_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(valid_factual_features), mx.nd.array(valid['yf'])) ''' Test dataset ''' test_rmse_ite_dataset = gluon.data.ArrayDataset( mx.nd.array(test['x'])) # todo rename, rmse_ite has nothing to do ''' Train DataLoader ''' train_factual_loader = gluon.data.DataLoader(train_factual_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) train_rmse_ite_loader = gluon.data.DataLoader(train_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Valid DataLoader ''' valid_factual_loader = gluon.data.DataLoader(valid_factual_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Test DataLoader ''' test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) number_of_batches = len(train_factual_loader) ''' Compute treatment probability ''' treatment_probability = np.mean(train['t']) train_start = time.time() ''' Train model ''' for epoch in range( 1, epochs + 1): # start with epoch 1 for easier learning rate calculation train_loss = 0 rmse_metric.reset() obj_loss = 0 imb_err = 0 for i, (batch_f_features, batch_yf) in enumerate(train_factual_loader): ''' Get data and labels into slices and copy each slice into a context. ''' batch_f_features = batch_f_features.as_in_context(ctx) batch_yf = batch_yf.as_in_context(ctx) x = batch_f_features[:, :-1] t = batch_f_features[:, -1] ''' Get treatment and control indices. Batch_size must be enough to have at least one t=1 sample ''' t1_idx = np.where(t == 1)[0] t0_idx = np.where(t == 0)[0] if t1_idx.shape[0] == 0: log( logfile, 'Encountered no treatment samples at batch ' + str(i) + '.') ''' Compute sample reweighing ''' if FLAGS.reweight_sample: w_t = t / (2 * treatment_probability) w_c = (1 - t) / (2 * 1 - treatment_probability) sample_weight = w_t + w_c else: sample_weight = 1.0 ''' Initialize outputs ''' outputs = np.zeros(batch_yf.shape) loss = np.zeros(batch_yf.shape) ''' Forward (Factual) ''' with autograd.record(): t1_o, t0_o, rep_o = net(x, mx.nd.array(t1_idx), mx.nd.array(t0_idx)) risk = 0 t1_o_loss = l2_loss(t1_o, batch_yf[t1_idx], sample_weight[t1_idx]) np.put(loss, t1_idx, t1_o_loss.asnumpy()) np.put(outputs, t1_idx, t1_o.asnumpy()) risk = risk + t1_o_loss.sum() t0_o_loss = l2_loss(t0_o, batch_yf[t0_idx], sample_weight[t0_idx]) np.put(loss, t0_idx, t0_o_loss.asnumpy()) np.put(outputs, t0_idx, t0_o.asnumpy()) risk = risk + t0_o_loss.sum() if FLAGS.normalization == 'divide': h_rep_norm = rep_o / mx_safe_sqrt( mx.nd.sum( mx.nd.square(rep_o), axis=1, keepdims=True)) else: h_rep_norm = 1.0 * rep_o imb_dist = wass_loss(h_rep_norm[t1_idx], h_rep_norm[t0_idx]) imb_error = FLAGS.p_alpha * imb_dist tot_error = risk if FLAGS.p_alpha > 0: tot_error = tot_error + imb_error ''' Save last epoch of first experiment reps for TSNE vis. ''' if train_experiment == 0 and epoch == range(epochs + 1)[-1]: first_exp_reps.extend(rep_o) ''' Backward ''' tot_error.backward() ''' Optimize ''' trainer.step(batch_size) train_loss += loss.mean() rmse_metric.update(batch_yf, mx.nd.array(outputs)) obj_loss += tot_error.asscalar() imb_err += imb_error.asscalar() if epoch % FLAGS.epoch_output_iter == 0 or epoch == 1: _, train_rmse_factual = rmse_metric.get() train_loss /= number_of_batches (_, valid_rmse_factual), _, _ = hybrid_test_net_with_cfr( net, valid_factual_loader, ctx, FLAGS, np.mean(valid['t'])) log( logfile, '[Epoch %d/%d] Train-rmse-factual: %.3f | Loss: %.3f | learning-rate: ' '%.3E | ObjLoss: %.3f | ImbErr: %.3f | Valid-rmse-factual: %.3f' % (epoch, epochs, train_rmse_factual, train_loss, trainer.learning_rate, obj_loss, imb_err, valid_rmse_factual)) ''' Plot first experiment learned TSNE visualization ''' if train_experiment == 0: tsne_plot_pca(data=train['x'], label=train['t'], learned_representation=np.asarray( [ind.asnumpy() for ind in first_exp_reps]), outdir=outdir + FLAGS.architecture.lower()) train_durations[train_experiment, :] = time.time() - train_start ''' Test model with valid data ''' y_t0, y_t1, = hybrid_predict_treated_and_controlled_with_cfr( net, train_rmse_ite_loader, ctx) if FLAGS.normalize_input: y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m train_score = train_evaluator.get_metrics(y_t1, y_t0) train_scores[train_experiment, :] = train_score y_t0, y_t1, = hybrid_predict_treated_and_controlled_with_cfr( net, test_rmse_ite_loader, ctx) if FLAGS.normalize_input: y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m test_score = test_evaluator.get_metrics(y_t1, y_t0) test_scores[train_experiment, :] = test_score log(logfile, '[Train Replication {}/{}]: train RMSE ITE: {:0.3f}, train ATE: {:0.3f}, train PEHE: {:0.3f},' \ ' test RMSE ITE: {:0.3f}, test ATE: {:0.3f}, test PEHE: {:0.3f}'.format(train_experiment + 1, train_experiments, train_score[0], train_score[1], train_score[2], test_score[0], test_score[1], test_score[2])) ''' Save means and stds NDArray values for inference ''' if FLAGS.normalize_input: mx.nd.save( outdir + FLAGS.architecture.lower() + '_means_stds_ihdp_' + str(train_experiments) + '_.nd', { "means": mx.nd.array(means), "stds": mx.nd.array(stds) }) ''' Export trained models ''' # See mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/save_load_params.html net.export(outdir + FLAGS.architecture.lower() + "-ihdp-predictions-" + str(train_experiments)) # hybrid log(logfile, '\n{} architecture total scores:'.format(FLAGS.architecture.upper())) ''' Train and test scores ''' means, stds = np.mean(train_scores, axis=0), sem(train_scores, axis=0, ddof=0) r_pehe_mean, r_pehe_std = np.mean(np.sqrt(train_scores[:, 2]), axis=0), sem(np.sqrt(train_scores[:, 2]), axis=0, ddof=0) train_total_scores_str = 'train RMSE ITE: {:.2f} ± {:.2f}, train ATE: {:.2f} ± {:.2f}, train PEHE: {:.2f} ± {:.2f}, ' \ 'train root PEHE: {:.2f} ± {:.2f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std) means, stds = np.mean(test_scores, axis=0), sem(test_scores, axis=0, ddof=0) r_pehe_mean, r_pehe_std = np.mean(np.sqrt(test_scores[:, 2]), axis=0), sem(np.sqrt(test_scores[:, 2]), axis=0, ddof=0) test_total_scores_str = 'test RMSE ITE: {:.2f} ± {:.2f}, test ATE: {:.2f} ± {:.2f}, test PEHE: {:.2f} ± {:.2f}, ' \ 'test root PEHE: {:.2f} ± {:.2f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std) log(logfile, train_total_scores_str) log(logfile, test_total_scores_str) mean_duration = float("{0:.2f}".format( np.mean(train_durations, axis=0)[0])) with open(outdir + FLAGS.architecture.lower() + "-total-scores-" + str(train_experiments), "w", encoding="utf8") as text_file: print(train_total_scores_str, "\n", test_total_scores_str, file=text_file) return { "ite": "{:.2f} ± {:.2f}".format(means[0], stds[0]), "ate": "{:.2f} ± {:.2f}".format(means[1], stds[1]), "pehe": "{:.2f} ± {:.2f}".format(means[2], stds[2]), "mean_duration": mean_duration }
def mx_run_out_of_sample_test(outdir): """ Runs a set of test experiments and stores result in a directory. """ ''' Logging. ''' logfile = outdir + 'log.txt' f = open(logfile, 'w') f.close() ''' Set GPUs/CPUs ''' num_gpus = mx.context.num_gpus() num_workers = int( FLAGS.num_workers) # replace num_workers with the number of cores ctx = mx.gpu() if num_gpus > 0 else mx.cpu() units = num_gpus if num_gpus > 0 else 1 batch_size_per_unit = int(FLAGS.batch_size_per_unit) # mini-batch size batch_size = batch_size_per_unit * max(units, 1) ''' Load test dataset ''' test_dataset = load_data(FLAGS.data_dir + FLAGS.data_test, normalize=FLAGS.normalize_input) ''' Import CFRNet ''' try: warnings.simplefilter("ignore") net_prefix = FLAGS.results_dir + "/" + FLAGS.architecture.lower( ) + "-ihdp-predictions-" + str(FLAGS.experiments) + "-" net = gluon.nn.SymbolBlock.imports(net_prefix + "symbol.json", ['data0', 'data1', 'data2'], net_prefix + "0000.params", ctx=ctx) except Exception as e: with open(outdir + 'error.txt', 'w') as error_file: error_file.write(''.join( traceback.format_exception(*sys.exc_info()))) print(e.args[0].split('Stack trace')[0]) print("More details at:\t" + str(outdir + 'error.txt')) sys.exit(-1) ''' Calculate number of test experiments ''' test_experiments = test_dataset['x'].shape[2] ''' Initialize test score results ''' test_scores = np.zeros((test_experiments, 3)) ''' Test model ''' for test_experiment in range(test_experiments): ''' Create testing dataset ''' x = test_dataset['x'][:, :, test_experiment] t = np.reshape(test_dataset['t'][:, test_experiment], (-1, 1)) yf = test_dataset['yf'][:, test_experiment] ycf = test_dataset['ycf'][:, test_experiment] mu0 = test_dataset['mu0'][:, test_experiment] mu1 = test_dataset['mu1'][:, test_experiment] ''' Test Evaluator, with labels not normalized ''' test_evaluator = Evaluator(t, yf, ycf, mu0, mu1) ''' Normalize yf ''' if FLAGS.normalize_input: test_yf_m, test_yf_std = np.mean(yf, axis=0), np.std(yf, axis=0) yf = (yf - test_yf_m) / test_yf_std ''' Test dataset ''' test_factual_dataset = gluon.data.ArrayDataset(mx.nd.array(x), mx.nd.array(t), mx.nd.array(yf)) ''' Test DataLoader ''' test_rmse_ite_loader = gluon.data.DataLoader(test_factual_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Test model with test data ''' y_t0, y_t1 = hybrid_predict_treated_and_controlled_with_cfr( net, test_rmse_ite_loader, ctx) if FLAGS.normalize_input: y_t0, y_t1 = y_t0 * test_yf_std + test_yf_m, y_t1 * test_yf_std + test_yf_m test_score = test_evaluator.get_metrics(y_t1, y_t0) test_scores[test_experiment, :] = test_score log( logfile, '[Test Replication {}/{}]:\tRMSE ITE: {:0.3f},\t\t ATE: {:0.3f},\t\t PEHE: {:0.3f}' .format(test_experiment + 1, test_experiments, test_score[0], test_score[1], test_score[2])) # test_scores = np.sqrt(test_scores) # todo means, stds = np.mean(test_scores, axis=0), sem(test_scores, axis=0, ddof=0) log(logfile, 'test RMSE ITE: {:.3f} ± {:.3f}, test ATE: {:.3f} ± {:.3f}, test PEHE: {:.3f} ± {:.3f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2]))
def run(args, outdir): """ Run training for NN4 architecture with Variational Bayes. """ ''' Hyperparameters ''' epochs = int(args.iterations) learning_rate = float(args.learning_rate) wd = float(args.weight_decay) hidden_size = int(args.hidden_size) train_experiments = int(args.experiments) learning_rate_factor = float(args.learning_rate_factor) learning_rate_steps = int( args.learning_rate_steps ) # changes the learning rate for every n updates. epoch_output_iter = int(args.epoch_output_iter) ''' Logging ''' logfile = outdir + 'log.txt' f = open(logfile, 'w') f.close() config = { # TODO may need adjustments # "sigma_p1": 1.5, "sigma_p1": 1.75, # og # "sigma_p2": 0.25, # "sigma_p2": 0.5, # og "sigma_p2": 0.5, "pi": 0.5, "lambda_p": 24.5 } ''' Set GPUs/CPUs ''' num_gpus = mx.context.num_gpus() num_workers = int( args.num_workers) # replace num_workers with the number of cores ctx = [mx.gpu(i) for i in range(num_gpus) ] if num_gpus > 0 else [mx.cpu()] # todo change as cfr_net_train batch_size_per_unit = int(args.batch_size_per_unit) # mini-batch size batch_size = batch_size_per_unit * max(num_gpus, 1) ''' Set seeds ''' for c in ctx: mx.random.seed(int(args.seed), c) np.random.seed(int(args.seed)) ''' Feed Forward Neural Network Model (4 hidden layers) ''' net = ff4_relu_architecture(hidden_size) ''' Load datasets ''' # train_dataset = load_data('../' + args.data_dir + args.data_train) # PyCharm run train_dataset = load_data(args.data_dir + args.data_train) # Terminal run log(logfile, 'Training data: ' + args.data_dir + args.data_train) log(logfile, 'Valid data: ' + args.data_dir + args.data_test) log( logfile, 'Loaded data with shape [%d,%d]' % (train_dataset['n'], train_dataset['dim'])) # ''' Feature correlation ''' # import pandas as pd # df = pd.DataFrame.from_records(train_dataset['x'][:, :, 20]) # df.insert(25, "t", train_dataset['t'][:, 20]) # corr = df.corr() # import seaborn as sns # sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, annot=True, fmt='.1f') ''' Instantiate net ''' ''' Param. init. ''' net.collect_params().initialize(mx.init.Xavier(), ctx=ctx) net.hybridize() ''' Forward-propagate a single data set entry once to set up all network parameters (weights and biases) with the desired initializer specified above. ''' x = train_dataset['x'][:, :, 0] t = np.reshape(train_dataset['t'][:, 0], (-1, 1)) yf = train_dataset['yf'][:, 0] yf_m, yf_std = np.mean(yf, axis=0), np.std(yf, axis=0) yf = (yf - yf_m) / yf_std factual_features = np.hstack((x, t)) zero_train_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(factual_features), mx.nd.array(yf)) zero_train_factual_loader = gluon.data.DataLoader( zero_train_factual_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) for i, (batch_f_features, batch_yf) in enumerate(zero_train_factual_loader): batch_f_features = gluon.utils.split_and_load(batch_f_features, ctx_list=ctx, even_split=False) [net(x) for x in batch_f_features] break weight_scale = .1 rho_offset = -3 lambda_init = 25 ''' Initialize variational parameters; mean and variance for each weight ''' mus = [] rhos = [] lambdas = [] shapes = list(map(lambda x: x.shape, net.collect_params().values())) for shape in shapes: # mu = gluon.Parameter('mu', shape=shape, init=mx.init.Normal(weight_scale)) # rho = gluon.Parameter('rho', shape=shape, init=mx.init.Constant(rho_offset)) lmb = gluon.Parameter('lmb', shape=shape, init=mx.init.Constant(lambda_init)) # mu.initialize(ctx=ctx) # rho.initialize(ctx=ctx) lmb.initialize(ctx=ctx) # mus.append(mu) # rhos.append(rho) lambdas.append(lmb) # variational_params = mus + rhos variational_params = lambdas # raw_mus = list(map(lambda x: x.data(ctx[0]), mus)) # raw_rhos = list(map(lambda x: x.data(ctx[0]), rhos)) raw_lambdas = list(map(lambda x: x.data(ctx[0]), lambdas)) ''' Metric, Loss and Optimizer ''' rmse_metric = mx.metric.RMSE() l2_loss = gluon.loss.L2Loss() bbb_loss = BBBLoss(ctx[0], log_prior="exponential", sigma_p1=config['sigma_p1'], sigma_p2=config['sigma_p2'], pi=config['pi'], lambda_p=config['lambda_p']) # bbb_loss = BBBLoss(ctx[0], log_prior="scale_mixture", sigma_p1=config['sigma_p1'], sigma_p2=config['sigma_p2'], # pi=config['pi']) scheduler = mx.lr_scheduler.FactorScheduler(step=learning_rate_steps, factor=learning_rate_factor, base_lr=learning_rate) # optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=scheduler) optimizer = mx.optimizer.RMSProp(learning_rate=learning_rate, lr_scheduler=scheduler, wd=wd) # optimizer = mx.optimizer.Adam(learning_rate=learning_rate) trainer = gluon.Trainer(variational_params, optimizer=optimizer) ''' Initialize train score results ''' train_scores = np.zeros((train_experiments, 3)) ''' Initialize train experiment durations ''' train_durations = np.zeros((train_experiments, 1)) ''' Initialize test score results ''' test_scores = np.zeros((train_experiments, 3)) ''' Train experiments means and stds ''' means = np.array([]) stds = np.array([]) ''' Train ''' for train_experiment in range(train_experiments): ''' Create training dataset ''' x = train_dataset['x'][:, :, train_experiment] t = np.reshape(train_dataset['t'][:, train_experiment], (-1, 1)) yf = train_dataset['yf'][:, train_experiment] ycf = train_dataset['ycf'][:, train_experiment] mu0 = train_dataset['mu0'][:, train_experiment] mu1 = train_dataset['mu1'][:, train_experiment] train, valid, test, _ = split_data_in_train_valid_test( x, t, yf, ycf, mu0, mu1) ''' With-in sample ''' train_evaluator = Evaluator( np.concatenate([train['t'], valid['t']]), np.concatenate([train['yf'], valid['yf']]), y_cf=np.concatenate([train['ycf'], valid['ycf']], axis=0), mu0=np.concatenate([train['mu0'], valid['mu0']], axis=0), mu1=np.concatenate([train['mu1'], valid['mu1']], axis=0)) test_evaluator = Evaluator(test['t'], test['yf'], test['ycf'], test['mu0'], test['mu1']) ''' Normalize yf ''' # TODO check for normalize input? yf_m, yf_std = np.mean(train['yf'], axis=0), np.std(train['yf'], axis=0) train['yf'] = (train['yf'] - yf_m) / yf_std valid['yf'] = (valid['yf'] - yf_m) / yf_std test['yf'] = (test['yf'] - yf_m) / yf_std ''' Save mean and std ''' means = np.append(means, yf_m) stds = np.append(stds, yf_std) ''' Train dataset ''' factual_features = np.hstack((train['x'], train['t'])) train_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(factual_features), mx.nd.array(train['yf'])) ''' With-in sample ''' train_rmse_ite_dataset = gluon.data.ArrayDataset( mx.nd.array(np.concatenate([train['x'], valid['x']]))) ''' Valid dataset ''' valid_factual_features = np.hstack((valid['x'], valid['t'])) valid_factual_dataset = gluon.data.ArrayDataset( mx.nd.array(valid_factual_features), mx.nd.array(valid['yf'])) ''' Test dataset ''' test_rmse_ite_dataset = gluon.data.ArrayDataset(mx.nd.array(test['x'])) ''' Train DataLoader ''' train_factual_loader = gluon.data.DataLoader(train_factual_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) train_rmse_ite_loader = gluon.data.DataLoader(train_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Valid DataLoader ''' valid_factual_loader = gluon.data.DataLoader(valid_factual_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) ''' Test DataLoader ''' test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) num_batch = len(train_factual_loader) train_start = time.time() train_acc = [] test_acc = [] ''' Train model ''' for epoch in range( 1, epochs + 1): # start with epoch 1 for easier learning rate calculation train_loss = 0 rmse_metric.reset() for i, (batch_f_features, batch_yf) in enumerate(train_factual_loader): ''' Get data and labels into slices and copy each slice into a context.''' batch_f_features = batch_f_features.as_in_context( ctx[0]).reshape((-1, 26)) batch_yf = batch_yf.as_in_context(ctx[0]).reshape( (len(batch_yf), -1)) ''' Forward ''' with autograd.record(): ''' Generate sample ''' # layer_params, sigmas = generate_weight_sample(shapes, raw_mus, raw_rhos, ctx[0]) layer_params = generate_weight_sample_exp( shapes, raw_lambdas, ctx[0]) ''' Overwrite network parameters with sampled parameters ''' for sample, param in zip(layer_params, net.collect_params().values()): param._data[0] = sample ''' Forward-propagate the batch ''' outputs = net(batch_f_features) # if epoch == epochs: # ''' Factual outcomes and batch_yf histograms ''' # import pandas as pd # df = pd.DataFrame({'layer_params': layer_params[6][0].asnumpy().flatten()}, columns=['layer_params']) # df = pd.DataFrame( # {'outputs': outputs.asnumpy().flatten(), 'batch_yf': batch_yf.asnumpy().flatten()}, # columns=['outputs', 'batch_yf']) # df.plot(kind='hist', alpha=0.5) # df.plot.kde() ''' Calculate the loss ''' l2_loss_value = l2_loss(outputs, batch_yf) # bbb_loss_value = bbb_loss(outputs, batch_yf, layer_params, raw_mus, sigmas, num_batch) bbb_loss_value = bbb_loss(outputs, batch_yf, layer_params, raw_lambdas, [], num_batch) loss = bbb_loss_value + l2_loss_value # loss = bbb_loss_value # loss = l2_loss_value ''' Backpropagate for gradient calculation ''' loss.backward() ''' Optimize ''' trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) rmse_metric.update(batch_yf, outputs) if epoch % epoch_output_iter == 0 or epoch == 1: _, train_rmse_factual = rmse_metric.get() train_loss /= num_batch _, valid_rmse_factual = test_net_vb(net, valid_factual_loader, layer_params, ctx) # _, train_RMSE = evaluate_RMSE(train_factual_loader, net, raw_mus, ctx) # _, test_RMSE = evaluate_RMSE(valid_factual_loader, net, raw_mus, ctx) # train_acc.append(np.asscalar(train_RMSE)) # test_acc.append(np.asscalar(test_RMSE)) # print("Epoch %s. Train-RMSE %s, Test-RMSE %s" % # (epoch, train_RMSE, test_RMSE)) log( logfile, 'l2-loss: %.3f, bbb-loss: %.3f' % (l2_loss_value[0].asscalar(), bbb_loss_value[0].asscalar())) log( logfile, '[Epoch %d/%d] Train-rmse-factual: %.3f, loss: %.3f | Valid-rmse-factual: %.3f | learning-rate: ' '%.3E' % (epoch, epochs, train_rmse_factual, train_loss, valid_rmse_factual, trainer.learning_rate)) train_durations[train_experiment, :] = time.time() - train_start ''' Test model ''' # y_t0, y_t1 = predict_treated_and_controlled_vb(net, train_rmse_ite_loader, raw_mus, ctx) y_t0, y_t1 = predict_treated_and_controlled_vb(net, train_rmse_ite_loader, layer_params, ctx) y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m train_score = train_evaluator.get_metrics(y_t1, y_t0) train_scores[train_experiment, :] = train_score # y_t0, y_t1 = predict_treated_and_controlled_vb(net, test_rmse_ite_loader, raw_mus, ctx) y_t0, y_t1 = predict_treated_and_controlled_vb(net, test_rmse_ite_loader, layer_params, ctx) y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m test_score = test_evaluator.get_metrics(y_t1, y_t0) test_scores[train_experiment, :] = test_score log(logfile, '[Train Replication {}/{}]: train RMSE ITE: {:0.3f}, train ATE: {:0.3f}, train PEHE: {:0.3f},' \ ' test RMSE ITE: {:0.3f}, test ATE: {:0.3f}, test PEHE: {:0.3f}'.format(train_experiment + 1, train_experiments, train_score[0], train_score[1], train_score[2], test_score[0], test_score[1], test_score[2])) # plt.plot(train_acc) # plt.plot(test_acc) ''' Save means and stds NDArray values for inference ''' mx.nd.save( outdir + args.architecture.lower() + '_means_stds_ihdp_' + str(train_experiments) + '_.nd', { "means": mx.nd.array(means), "stds": mx.nd.array(stds) }) ''' Export trained model ''' net.export(outdir + args.architecture.lower() + "-ihdp-predictions-" + str(train_experiments), epoch=epochs) log(logfile, '\n{} architecture total scores:'.format(args.architecture.upper())) ''' Train and test scores ''' means, stds = np.mean(train_scores, axis=0), sem(train_scores, axis=0, ddof=0) r_pehe_mean, r_pehe_std = np.mean(np.sqrt(train_scores[:, 2]), axis=0), sem(np.sqrt(train_scores[:, 2]), axis=0, ddof=0) train_total_scores_str = 'train RMSE ITE: {:.2f} ± {:.2f}, train ATE: {:.2f} ± {:.2f}, train PEHE: {:.2f} ± {:.2f}, ' \ 'train root PEHE: {:.2f} ± {:.2f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std) means, stds = np.mean(test_scores, axis=0), sem(test_scores, axis=0, ddof=0) r_pehe_mean, r_pehe_std = np.mean(np.sqrt(test_scores[:, 2]), axis=0), sem(np.sqrt(test_scores[:, 2]), axis=0, ddof=0) test_total_scores_str = 'test RMSE ITE: {:.2f} ± {:.2f}, test ATE: {:.2f} ± {:.2f}, test PEHE: {:.2f} ± {:.2f}, ' \ 'test root PEHE: {:.2f} ± {:.2f}' \ ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std) log(logfile, train_total_scores_str) log(logfile, test_total_scores_str) mean_duration = float("{0:.2f}".format( np.mean(train_durations, axis=0)[0])) with open(outdir + args.architecture.lower() + "-total-scores-" + str(train_experiments), "w", encoding="utf8") as text_file: print(train_total_scores_str, "\n", test_total_scores_str, file=text_file) return { "ite": "{:.2f} ± {:.2f}".format(means[0], stds[0]), "ate": "{:.2f} ± {:.2f}".format(means[1], stds[1]), "pehe": "{:.2f} ± {:.2f}".format(means[2], stds[2]), "mean_duration": mean_duration }