示例#1
0
def run(args, outdir):
    """ Run training for standard NN4 architecture. """
    ''' Hyperparameters '''
    epochs = int(args.iterations)
    learning_rate = float(args.learning_rate)
    wd = float(args.weight_decay)
    hidden_size = int(args.hidden_size)
    train_experiments = int(args.experiments)
    learning_rate_factor = float(args.learning_rate_factor)
    learning_rate_steps = int(
        args.learning_rate_steps
    )  # changes the learning rate for every n updates.
    epoch_output_iter = int(args.epoch_output_iter)
    ''' Logging '''
    logfile = outdir + 'log.txt'
    f = open(logfile, 'w')
    f.close()
    ''' Set GPUs/CPUs '''
    num_gpus = mx.context.num_gpus()
    num_workers = int(
        args.num_workers)  # replace num_workers with the number of cores
    ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    batch_size_per_unit = int(args.batch_size_per_unit)  # mini-batch size
    batch_size = batch_size_per_unit * max(num_gpus, 1)
    ''' Set seeds '''
    for c in ctx:
        mx.random.seed(int(args.seed), c)
    np.random.seed(int(args.seed))
    ''' Feed Forward Neural Network Model (4 hidden layers) '''
    net = ff4_relu_architecture(hidden_size)
    ''' Load dataset '''
    # train_dataset = load_data('../' + args.data_dir + args.data_train) # PyCharm run
    train_dataset = load_data(args.data_dir + args.data_train)  # Terminal run
    ''' Instantiate net '''
    net.initialize(init=init.Xavier(), ctx=ctx)
    net.hybridize()  # hybridize for better performance

    # TODO decide upon
    ''' Plot net graph '''
    # x_sym = mx.sym.var('data')
    # sym = net(x_sym)
    # mx.viz.plot_network(sym, title=args.architecture.lower() + "_plot").view(
    #     filename=outdir + args.architecture.lower() + "_plot")
    ''' Metric, Loss and Optimizer '''
    rmse_metric = mx.metric.RMSE()
    l2_loss = gluon.loss.L2Loss()
    scheduler = mx.lr_scheduler.FactorScheduler(step=learning_rate_steps,
                                                factor=learning_rate_factor,
                                                base_lr=learning_rate)
    optimizer = mx.optimizer.Adam(learning_rate=learning_rate,
                                  lr_scheduler=scheduler)
    # optimizer = mx.optimizer.RMSProp(learning_rate=learning_rate, lr_scheduler=scheduler, wd=wd)
    trainer = gluon.Trainer(net.collect_params(), optimizer=optimizer)
    ''' Initialize train score results '''
    train_scores = np.zeros((train_experiments, 3))
    ''' Initialize train experiment durations '''
    train_durations = np.zeros((train_experiments, 1))
    ''' Initialize test score results '''
    test_scores = np.zeros((train_experiments, 3))
    ''' Train experiments means and stds '''
    means = np.array([])
    stds = np.array([])
    ''' Train '''
    for train_experiment in range(train_experiments):
        ''' Create training dataset '''
        x = train_dataset['x'][:, :, train_experiment]
        t = np.reshape(train_dataset['t'][:, train_experiment], (-1, 1))
        yf = train_dataset['yf'][:, train_experiment]
        ycf = train_dataset['ycf'][:, train_experiment]
        mu0 = train_dataset['mu0'][:, train_experiment]
        mu1 = train_dataset['mu1'][:, train_experiment]

        train, valid, test, _ = split_data_in_train_valid_test(
            x, t, yf, ycf, mu0, mu1)
        ''' With-in sample '''
        train_evaluator = Evaluator(
            np.concatenate([train['t'], valid['t']]),
            np.concatenate([train['yf'], valid['yf']]),
            y_cf=np.concatenate([train['ycf'], valid['ycf']], axis=0),
            mu0=np.concatenate([train['mu0'], valid['mu0']], axis=0),
            mu1=np.concatenate([train['mu1'], valid['mu1']], axis=0))
        test_evaluator = Evaluator(test['t'], test['yf'], test['ycf'],
                                   test['mu0'], test['mu1'])
        ''' Normalize yf '''  # todo normalize option as others , or always default norm?
        yf_m, yf_std = np.mean(train['yf'], axis=0), np.std(train['yf'],
                                                            axis=0)
        train['yf'] = (train['yf'] - yf_m) / yf_std
        valid['yf'] = (valid['yf'] - yf_m) / yf_std
        test['yf'] = (test['yf'] - yf_m) / yf_std
        ''' Save mean and std '''
        means = np.append(means, yf_m)
        stds = np.append(stds, yf_std)
        ''' Train dataset '''
        factual_features = np.hstack((train['x'], train['t']))
        train_factual_dataset = gluon.data.ArrayDataset(
            mx.nd.array(factual_features), mx.nd.array(train['yf']))
        ''' With-in sample '''
        train_rmse_ite_dataset = gluon.data.ArrayDataset(
            mx.nd.array(np.concatenate([train['x'], valid['x']])))
        ''' Valid dataset '''
        valid_factual_features = np.hstack((valid['x'], valid['t']))
        valid_factual_dataset = gluon.data.ArrayDataset(
            mx.nd.array(valid_factual_features), mx.nd.array(valid['yf']))
        ''' Test dataset '''
        test_rmse_ite_dataset = gluon.data.ArrayDataset(
            mx.nd.array(test['x']))  # todo rename, rmse_ite has nothing to do
        ''' Train DataLoader '''
        train_factual_loader = gluon.data.DataLoader(train_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=True,
                                                     num_workers=num_workers)
        train_rmse_ite_loader = gluon.data.DataLoader(train_rmse_ite_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=False,
                                                      num_workers=num_workers)
        ''' Valid DataLoader '''
        valid_factual_loader = gluon.data.DataLoader(valid_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)
        ''' Test DataLoader '''
        test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)

        num_batch = len(train_factual_loader)

        train_start = time.time()
        ''' Train model '''
        for epoch in range(
                1, epochs +
                1):  # start with epoch 1 for easier learning rate calculation

            start = time.time()
            train_loss = 0
            rmse_metric.reset()

            for i, (batch_f_features,
                    batch_yf) in enumerate(train_factual_loader):
                ''' Get data and labels into slices and copy each slice into a context. '''
                batch_f_features = gluon.utils.split_and_load(batch_f_features,
                                                              ctx_list=ctx,
                                                              even_split=False)
                batch_yf = gluon.utils.split_and_load(batch_yf,
                                                      ctx_list=ctx,
                                                      even_split=False)
                ''' Forward '''
                with autograd.record():
                    outputs = [net(x) for x in batch_f_features]
                    loss = [
                        l2_loss(yhat, y) for yhat, y in zip(outputs, batch_yf)
                    ]
                ''' Backward '''
                for l in loss:
                    l.backward()
                ''' Optimize '''
                trainer.step(batch_size)

                train_loss += sum([l.mean().asscalar()
                                   for l in loss]) / len(loss)
                rmse_metric.update(batch_yf, outputs)

            if epoch % epoch_output_iter == 0 or epoch == 1:
                _, train_rmse_factual = rmse_metric.get()
                train_loss /= num_batch
                _, valid_rmse_factual = test_net(net, valid_factual_loader,
                                                 ctx)

                log(
                    logfile,
                    '[Epoch %d/%d] Train-rmse-factual: %.3f, Loss: %.3f | Valid-rmse-factual: %.3f | learning-rate: '
                    '%.3E' % (epoch, epochs, train_rmse_factual, train_loss,
                              valid_rmse_factual, trainer.learning_rate))

        train_durations[train_experiment, :] = time.time() - train_start
        ''' Test model '''
        y_t0, y_t1 = predict_treated_and_controlled(net, train_rmse_ite_loader,
                                                    ctx)
        y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m
        train_score = train_evaluator.get_metrics(y_t1, y_t0)
        train_scores[train_experiment, :] = train_score

        y_t0, y_t1 = predict_treated_and_controlled(net, test_rmse_ite_loader,
                                                    ctx)
        y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m
        test_score = test_evaluator.get_metrics(y_t1, y_t0)
        test_scores[train_experiment, :] = test_score

        log(logfile, '[Train Replication {}/{}]: train RMSE ITE: {:0.3f}, train ATE: {:0.3f}, train PEHE: {:0.3f},' \
                     ' test RMSE ITE: {:0.3f}, test ATE: {:0.3f}, test PEHE: {:0.3f}'.format(train_experiment + 1,
                                                                                             train_experiments,
                                                                                             train_score[0],
                                                                                             train_score[1],
                                                                                             train_score[2],
                                                                                             test_score[0],
                                                                                             test_score[1],
                                                                                             test_score[2]))
    ''' Save means and stds NDArray values for inference '''
    mx.nd.save(
        outdir + args.architecture.lower() + '_means_stds_ihdp_' +
        str(train_experiments) + '_.nd', {
            "means": mx.nd.array(means),
            "stds": mx.nd.array(stds)
        })
    ''' Export trained model '''
    net.export(outdir + args.architecture.lower() + "-ihdp-predictions-" +
               str(train_experiments),
               epoch=epochs)

    log(logfile,
        '\n{} architecture total scores:'.format(args.architecture.upper()))
    ''' Train and test scores '''
    means, stds = np.mean(train_scores, axis=0), sem(train_scores,
                                                     axis=0,
                                                     ddof=0)
    r_pehe_mean, r_pehe_std = np.mean(np.sqrt(train_scores[:, 2]),
                                      axis=0), sem(np.sqrt(train_scores[:, 2]),
                                                   axis=0,
                                                   ddof=0)
    train_total_scores_str = 'train RMSE ITE: {:.2f} ± {:.2f}, train ATE: {:.2f} ± {:.2f}, train PEHE: {:.2f} ± {:.2f}, ' \
                             'train root PEHE: {:.2f} ± {:.2f}' \
                             ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std)

    means, stds = np.mean(test_scores, axis=0), sem(test_scores,
                                                    axis=0,
                                                    ddof=0)
    r_pehe_mean, r_pehe_std = np.mean(np.sqrt(test_scores[:, 2]),
                                      axis=0), sem(np.sqrt(test_scores[:, 2]),
                                                   axis=0,
                                                   ddof=0)
    test_total_scores_str = 'test RMSE ITE: {:.2f} ± {:.2f}, test ATE: {:.2f} ± {:.2f}, test PEHE: {:.2f} ± {:.2f}, ' \
                            'test root PEHE: {:.2f} ± {:.2f}' \
                            ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std)

    log(logfile, train_total_scores_str)
    log(logfile, test_total_scores_str)

    mean_duration = float("{0:.2f}".format(
        np.mean(train_durations, axis=0)[0]))

    with open(outdir + args.architecture.lower() + "-total-scores-" +
              str(train_experiments),
              "w",
              encoding="utf8") as text_file:
        print(train_total_scores_str,
              "\n",
              test_total_scores_str,
              file=text_file)

    return {
        "ite": "{:.2f} ± {:.2f}".format(means[0], stds[0]),
        "ate": "{:.2f} ± {:.2f}".format(means[1], stds[1]),
        "pehe": "{:.2f} ± {:.2f}".format(means[2], stds[2]),
        "mean_duration": mean_duration
    }
示例#2
0
def run_test(args):
    """ Run testing for standard NN4 architecture. """
    ''' Set GPUs/CPUs '''
    num_gpus = mx.context.num_gpus()
    num_workers = int(
        args.num_workers)  # replace num_workers with the number of cores
    ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    batch_size_per_unit = int(args.batch_size_per_unit)  # mini-batch size
    batch_size = batch_size_per_unit * max(num_gpus, 1)
    ''' Load test dataset '''
    test_dataset = load_data('../' + args.data_dir + args.data_test)
    train_dataset = load_data('../' + args.data_dir + args.data_train)
    ''' Load training means and stds '''
    train_means_stds = mx.nd.load(args.means_stds)
    train_means = train_means_stds['means']
    train_stds = train_means_stds['stds']

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        net = gluon.nn.SymbolBlock.imports(args.symbol, ['data'],
                                           args.params,
                                           ctx=ctx)
    ''' Calculate number of test experiments '''
    test_experiments = np.min([test_dataset['x'].shape[2], len(train_means)])
    ''' Initialize test score results '''
    test_scores = np.zeros((test_experiments, 3))
    ''' Test model '''
    for test_experiment in range(test_experiments):
        ''' Create testing dataset '''
        x = test_dataset['x'][:, :, test_experiment]
        t = np.reshape(test_dataset['t'][:, test_experiment], (-1, 1))
        yf = test_dataset['yf'][:, test_experiment]
        ycf = test_dataset['ycf'][:, test_experiment]
        mu0 = test_dataset['mu0'][:, test_experiment]
        mu1 = test_dataset['mu1'][:, test_experiment]
        ''' With-in sample '''
        test_evaluator = Evaluator(t, yf, ycf, mu0, mu1)
        ''' Retrieve training mean and std '''
        train_yf_m, train_yf_std = train_means[test_experiment].asnumpy(
        ), train_stds[test_experiment].asnumpy()
        ''' Test dataset '''
        test_rmse_ite_dataset = gluon.data.ArrayDataset(mx.nd.array(x))
        ''' Test DataLoader '''
        test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)
        ''' Test model '''
        y_t0, y_t1 = predict_treated_and_controlled(net, test_rmse_ite_loader,
                                                    ctx)
        y_t0, y_t1 = y_t0 * train_yf_std + train_yf_m, y_t1 * train_yf_std + train_yf_m
        test_score = test_evaluator.get_metrics(y_t1, y_t0)
        test_scores[test_experiment, :] = test_score

        print(
            '[Test Replication {}/{}]:\tRMSE ITE: {:0.3f},\t\t ATE: {:0.3f},\t\t PEHE: {:0.3f}'
            .format(test_experiment + 1, test_experiments, test_score[0],
                    test_score[1], test_score[2]))

    means, stds = np.mean(test_scores, axis=0), sem(test_scores,
                                                    axis=0,
                                                    ddof=0)
    print('test RMSE ITE: {:.3f} ± {:.3f}, test ATE: {:.3f} ± {:.3f}, test PEHE: {:.3f} ± {:.3f}' \
          ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2]))
示例#3
0
def run(outdir):
    """ Runs a set of training and validation experiments and stores result in a directory. """
    ''' Set up paths and start log '''
    logfile = outdir + 'log.txt'
    f = open(logfile, 'w')
    f.close()
    ''' Hyperparameters '''
    epochs = int(FLAGS.iterations)
    learning_rate = float(FLAGS.learning_rate)
    wd = float(FLAGS.weight_decay)
    train_experiments = int(FLAGS.experiments)
    learning_rate_factor = float(FLAGS.learning_rate_factor)
    learning_rate_steps = int(
        FLAGS.learning_rate_steps
    )  # changes the learning rate for every n updates.
    ''' Logging '''
    logfile = outdir + 'log.txt'
    f = open(logfile, 'w')
    f.close()
    data_train = FLAGS.data_dir + FLAGS.data_train
    data_train_valid = FLAGS.data_dir + FLAGS.data_test
    ''' Set GPUs/CPUs '''
    num_gpus = mx.context.num_gpus()
    num_workers = int(
        FLAGS.num_workers)  # replace num_workers with the number of cores
    ctx = mx.gpu() if num_gpus > 0 else mx.cpu()
    units = num_gpus if num_gpus > 0 else 1
    batch_size_per_unit = int(FLAGS.batch_size_per_unit)  # mini-batch size
    batch_size = batch_size_per_unit * max(units, 1)
    ''' Set random seeds '''
    random.seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)
    mx.random.seed(FLAGS.seed)
    ''' Save parameters '''
    save_config(outdir + 'config.txt', FLAGS)

    log(
        logfile, 'Training with hyperparameters: alpha=%.2g, lambda=%.2g' %
        (FLAGS.p_alpha, FLAGS.weight_decay))
    ''' Load dataset '''
    train_dataset = load_data(data_train, normalize=FLAGS.normalize_input)

    log(logfile, 'Training data: ' + data_train)
    log(logfile, 'Valid data:     ' + data_train_valid)
    log(
        logfile, 'Loaded data with shape [%d,%d]' %
        (train_dataset['n'], train_dataset['dim']))
    ''' CFR Neural Network Architecture for ITE estimation '''
    net = CFRNet(FLAGS.dim_rep, FLAGS.dim_hyp, FLAGS.weight_init_scale,
                 train_dataset['dim'], FLAGS.batch_norm)
    ''' Instantiate net '''
    net.initialize(ctx=ctx)
    net.hybridize()  # hybridize for better performance
    ''' Metric, Loss and Optimizer '''
    rmse_metric = mx.metric.RMSE()
    l2_loss = gluon.loss.L2Loss()
    wass_loss = WassersteinLoss(
        lam=FLAGS.wass_lambda,
        its=FLAGS.wass_iterations,
        square=True,
        backpropT=FLAGS.wass_bpg)  # Change too at hybrid_test_net_with_cfr
    scheduler = mx.lr_scheduler.FactorScheduler(step=learning_rate_steps,
                                                factor=learning_rate_factor,
                                                base_lr=learning_rate)
    optimizer = mx.optimizer.Adam(learning_rate=learning_rate,
                                  lr_scheduler=scheduler)
    # optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=scheduler, wd=wd)
    trainer = gluon.Trainer(net.collect_params(), optimizer=optimizer)
    ''' Initialize train score results '''
    train_scores = np.zeros((train_experiments, 3))
    ''' Initialize train experiment durations '''
    train_durations = np.zeros((train_experiments, 1))
    ''' Initialize valid score results '''
    test_scores = np.zeros((train_experiments, 3))
    ''' Train experiments means and stds '''
    means = np.array([])
    stds = np.array([])
    ''' Train '''
    for train_experiment in range(train_experiments):
        ''' Create training dataset '''
        x = train_dataset['x'][:, :, train_experiment]
        t = np.reshape(train_dataset['t'][:, train_experiment], (-1, 1))
        yf = train_dataset['yf'][:, train_experiment]
        ycf = train_dataset['ycf'][:, train_experiment]
        mu0 = train_dataset['mu0'][:, train_experiment]
        mu1 = train_dataset['mu1'][:, train_experiment]

        train, valid, test, _ = split_data_in_train_valid_test(
            x, t, yf, ycf, mu0, mu1)
        ''' With-in sample '''
        train_evaluator = Evaluator(
            np.concatenate([train['t'], valid['t']]),
            np.concatenate([train['yf'], valid['yf']]),
            y_cf=np.concatenate([train['ycf'], valid['ycf']], axis=0),
            mu0=np.concatenate([train['mu0'], valid['mu0']], axis=0),
            mu1=np.concatenate([train['mu1'], valid['mu1']], axis=0))
        test_evaluator = Evaluator(test['t'], test['yf'], test['ycf'],
                                   test['mu0'], test['mu1'])
        ''' Plot first experiment original TSNE visualization '''
        if train_experiment == 0:
            ''' Learned representations of first experiment for TSNE visualization '''
            first_exp_reps = []
        ''' Normalize yf '''
        if FLAGS.normalize_input:
            yf_m, yf_std = np.mean(train['yf'], axis=0), np.std(train['yf'],
                                                                axis=0)
            train['yf'] = (train['yf'] - yf_m) / yf_std
            valid['yf'] = (valid['yf'] - yf_m) / yf_std
            test['yf'] = (test['yf'] - yf_m) / yf_std
            ''' Save mean and std '''
            means = np.append(means, yf_m)
            stds = np.append(stds, yf_std)
        ''' Train dataset '''
        factual_features = np.hstack((train['x'], train['t']))
        train_factual_dataset = gluon.data.ArrayDataset(
            mx.nd.array(factual_features), mx.nd.array(train['yf']))
        ''' With-in sample '''
        train_rmse_ite_dataset = gluon.data.ArrayDataset(
            mx.nd.array(np.concatenate([train['x'], valid['x']])))
        ''' Valid dataset '''
        valid_factual_features = np.hstack((valid['x'], valid['t']))
        valid_factual_dataset = gluon.data.ArrayDataset(
            mx.nd.array(valid_factual_features), mx.nd.array(valid['yf']))
        ''' Test dataset '''
        test_rmse_ite_dataset = gluon.data.ArrayDataset(
            mx.nd.array(test['x']))  # todo rename, rmse_ite has nothing to do
        ''' Train DataLoader '''
        train_factual_loader = gluon.data.DataLoader(train_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=True,
                                                     num_workers=num_workers)
        train_rmse_ite_loader = gluon.data.DataLoader(train_rmse_ite_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=False,
                                                      num_workers=num_workers)
        ''' Valid DataLoader '''
        valid_factual_loader = gluon.data.DataLoader(valid_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)
        ''' Test DataLoader '''
        test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)

        number_of_batches = len(train_factual_loader)
        ''' Compute treatment probability '''
        treatment_probability = np.mean(train['t'])

        train_start = time.time()
        ''' Train model '''
        for epoch in range(
                1, epochs +
                1):  # start with epoch 1 for easier learning rate calculation

            train_loss = 0
            rmse_metric.reset()
            obj_loss = 0
            imb_err = 0

            for i, (batch_f_features,
                    batch_yf) in enumerate(train_factual_loader):
                ''' Get data and labels into slices and copy each slice into a context. '''
                batch_f_features = batch_f_features.as_in_context(ctx)
                batch_yf = batch_yf.as_in_context(ctx)

                x = batch_f_features[:, :-1]
                t = batch_f_features[:, -1]
                ''' Get treatment and control indices. Batch_size must be enough to have at least one t=1 sample '''
                t1_idx = np.where(t == 1)[0]
                t0_idx = np.where(t == 0)[0]

                if t1_idx.shape[0] == 0:
                    log(
                        logfile, 'Encountered no treatment samples at batch ' +
                        str(i) + '.')
                ''' Compute sample reweighing '''
                if FLAGS.reweight_sample:
                    w_t = t / (2 * treatment_probability)
                    w_c = (1 - t) / (2 * 1 - treatment_probability)
                    sample_weight = w_t + w_c
                else:
                    sample_weight = 1.0
                ''' Initialize outputs '''
                outputs = np.zeros(batch_yf.shape)
                loss = np.zeros(batch_yf.shape)
                ''' Forward (Factual) '''
                with autograd.record():
                    t1_o, t0_o, rep_o = net(x, mx.nd.array(t1_idx),
                                            mx.nd.array(t0_idx))

                    risk = 0

                    t1_o_loss = l2_loss(t1_o, batch_yf[t1_idx],
                                        sample_weight[t1_idx])
                    np.put(loss, t1_idx, t1_o_loss.asnumpy())
                    np.put(outputs, t1_idx, t1_o.asnumpy())
                    risk = risk + t1_o_loss.sum()

                    t0_o_loss = l2_loss(t0_o, batch_yf[t0_idx],
                                        sample_weight[t0_idx])
                    np.put(loss, t0_idx, t0_o_loss.asnumpy())
                    np.put(outputs, t0_idx, t0_o.asnumpy())
                    risk = risk + t0_o_loss.sum()

                    if FLAGS.normalization == 'divide':
                        h_rep_norm = rep_o / mx_safe_sqrt(
                            mx.nd.sum(
                                mx.nd.square(rep_o), axis=1, keepdims=True))
                    else:
                        h_rep_norm = 1.0 * rep_o

                    imb_dist = wass_loss(h_rep_norm[t1_idx],
                                         h_rep_norm[t0_idx])

                    imb_error = FLAGS.p_alpha * imb_dist

                    tot_error = risk

                    if FLAGS.p_alpha > 0:
                        tot_error = tot_error + imb_error
                    ''' Save last epoch of first experiment reps for TSNE vis. '''
                    if train_experiment == 0 and epoch == range(epochs +
                                                                1)[-1]:
                        first_exp_reps.extend(rep_o)
                ''' Backward '''
                tot_error.backward()
                ''' Optimize '''
                trainer.step(batch_size)

                train_loss += loss.mean()
                rmse_metric.update(batch_yf, mx.nd.array(outputs))

                obj_loss += tot_error.asscalar()
                imb_err += imb_error.asscalar()

            if epoch % FLAGS.epoch_output_iter == 0 or epoch == 1:
                _, train_rmse_factual = rmse_metric.get()
                train_loss /= number_of_batches
                (_, valid_rmse_factual), _, _ = hybrid_test_net_with_cfr(
                    net, valid_factual_loader, ctx, FLAGS, np.mean(valid['t']))

                log(
                    logfile,
                    '[Epoch %d/%d] Train-rmse-factual: %.3f | Loss: %.3f | learning-rate: '
                    '%.3E | ObjLoss: %.3f | ImbErr: %.3f | Valid-rmse-factual: %.3f'
                    % (epoch, epochs, train_rmse_factual, train_loss,
                       trainer.learning_rate, obj_loss, imb_err,
                       valid_rmse_factual))
        ''' Plot first experiment learned TSNE visualization '''
        if train_experiment == 0:
            tsne_plot_pca(data=train['x'],
                          label=train['t'],
                          learned_representation=np.asarray(
                              [ind.asnumpy() for ind in first_exp_reps]),
                          outdir=outdir + FLAGS.architecture.lower())

        train_durations[train_experiment, :] = time.time() - train_start
        ''' Test model with valid data '''
        y_t0, y_t1, = hybrid_predict_treated_and_controlled_with_cfr(
            net, train_rmse_ite_loader, ctx)
        if FLAGS.normalize_input:
            y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m
        train_score = train_evaluator.get_metrics(y_t1, y_t0)
        train_scores[train_experiment, :] = train_score

        y_t0, y_t1, = hybrid_predict_treated_and_controlled_with_cfr(
            net, test_rmse_ite_loader, ctx)
        if FLAGS.normalize_input:
            y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m
        test_score = test_evaluator.get_metrics(y_t1, y_t0)
        test_scores[train_experiment, :] = test_score

        log(logfile, '[Train Replication {}/{}]: train RMSE ITE: {:0.3f}, train ATE: {:0.3f}, train PEHE: {:0.3f},' \
                     ' test RMSE ITE: {:0.3f}, test ATE: {:0.3f}, test PEHE: {:0.3f}'.format(train_experiment + 1,
                                                                                             train_experiments,
                                                                                             train_score[0],
                                                                                             train_score[1],
                                                                                             train_score[2],
                                                                                             test_score[0],
                                                                                             test_score[1],
                                                                                             test_score[2]))
    ''' Save means and stds NDArray values for inference '''
    if FLAGS.normalize_input:
        mx.nd.save(
            outdir + FLAGS.architecture.lower() + '_means_stds_ihdp_' +
            str(train_experiments) + '_.nd', {
                "means": mx.nd.array(means),
                "stds": mx.nd.array(stds)
            })
    ''' Export trained models '''
    # See mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/save_load_params.html
    net.export(outdir + FLAGS.architecture.lower() + "-ihdp-predictions-" +
               str(train_experiments))  # hybrid

    log(logfile,
        '\n{} architecture total scores:'.format(FLAGS.architecture.upper()))
    ''' Train and test scores '''
    means, stds = np.mean(train_scores, axis=0), sem(train_scores,
                                                     axis=0,
                                                     ddof=0)
    r_pehe_mean, r_pehe_std = np.mean(np.sqrt(train_scores[:, 2]),
                                      axis=0), sem(np.sqrt(train_scores[:, 2]),
                                                   axis=0,
                                                   ddof=0)
    train_total_scores_str = 'train RMSE ITE: {:.2f} ± {:.2f}, train ATE: {:.2f} ± {:.2f}, train PEHE: {:.2f} ± {:.2f}, ' \
                             'train root PEHE: {:.2f} ± {:.2f}' \
                             ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std)

    means, stds = np.mean(test_scores, axis=0), sem(test_scores,
                                                    axis=0,
                                                    ddof=0)
    r_pehe_mean, r_pehe_std = np.mean(np.sqrt(test_scores[:, 2]),
                                      axis=0), sem(np.sqrt(test_scores[:, 2]),
                                                   axis=0,
                                                   ddof=0)
    test_total_scores_str = 'test RMSE ITE: {:.2f} ± {:.2f}, test ATE: {:.2f} ± {:.2f}, test PEHE: {:.2f} ± {:.2f}, ' \
                            'test root PEHE: {:.2f} ± {:.2f}' \
                            ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std)

    log(logfile, train_total_scores_str)
    log(logfile, test_total_scores_str)

    mean_duration = float("{0:.2f}".format(
        np.mean(train_durations, axis=0)[0]))

    with open(outdir + FLAGS.architecture.lower() + "-total-scores-" +
              str(train_experiments),
              "w",
              encoding="utf8") as text_file:
        print(train_total_scores_str,
              "\n",
              test_total_scores_str,
              file=text_file)

    return {
        "ite": "{:.2f} ± {:.2f}".format(means[0], stds[0]),
        "ate": "{:.2f} ± {:.2f}".format(means[1], stds[1]),
        "pehe": "{:.2f} ± {:.2f}".format(means[2], stds[2]),
        "mean_duration": mean_duration
    }
示例#4
0
def mx_run_out_of_sample_test(outdir):
    """ Runs a set of test experiments and stores result in a directory. """
    ''' Logging. '''
    logfile = outdir + 'log.txt'
    f = open(logfile, 'w')
    f.close()
    ''' Set GPUs/CPUs '''
    num_gpus = mx.context.num_gpus()
    num_workers = int(
        FLAGS.num_workers)  # replace num_workers with the number of cores
    ctx = mx.gpu() if num_gpus > 0 else mx.cpu()
    units = num_gpus if num_gpus > 0 else 1
    batch_size_per_unit = int(FLAGS.batch_size_per_unit)  # mini-batch size
    batch_size = batch_size_per_unit * max(units, 1)
    ''' Load test dataset '''
    test_dataset = load_data(FLAGS.data_dir + FLAGS.data_test,
                             normalize=FLAGS.normalize_input)
    ''' Import CFRNet '''
    try:
        warnings.simplefilter("ignore")
        net_prefix = FLAGS.results_dir + "/" + FLAGS.architecture.lower(
        ) + "-ihdp-predictions-" + str(FLAGS.experiments) + "-"
        net = gluon.nn.SymbolBlock.imports(net_prefix + "symbol.json",
                                           ['data0', 'data1', 'data2'],
                                           net_prefix + "0000.params",
                                           ctx=ctx)
    except Exception as e:
        with open(outdir + 'error.txt', 'w') as error_file:
            error_file.write(''.join(
                traceback.format_exception(*sys.exc_info())))
        print(e.args[0].split('Stack trace')[0])
        print("More details at:\t" + str(outdir + 'error.txt'))
        sys.exit(-1)
    ''' Calculate number of test experiments '''
    test_experiments = test_dataset['x'].shape[2]
    ''' Initialize test score results '''
    test_scores = np.zeros((test_experiments, 3))
    ''' Test model '''
    for test_experiment in range(test_experiments):
        ''' Create testing dataset '''
        x = test_dataset['x'][:, :, test_experiment]
        t = np.reshape(test_dataset['t'][:, test_experiment], (-1, 1))
        yf = test_dataset['yf'][:, test_experiment]
        ycf = test_dataset['ycf'][:, test_experiment]
        mu0 = test_dataset['mu0'][:, test_experiment]
        mu1 = test_dataset['mu1'][:, test_experiment]
        ''' Test Evaluator, with labels not normalized '''
        test_evaluator = Evaluator(t, yf, ycf, mu0, mu1)
        ''' Normalize yf '''
        if FLAGS.normalize_input:
            test_yf_m, test_yf_std = np.mean(yf, axis=0), np.std(yf, axis=0)
            yf = (yf - test_yf_m) / test_yf_std
        ''' Test dataset '''
        test_factual_dataset = gluon.data.ArrayDataset(mx.nd.array(x),
                                                       mx.nd.array(t),
                                                       mx.nd.array(yf))
        ''' Test DataLoader '''
        test_rmse_ite_loader = gluon.data.DataLoader(test_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)
        ''' Test model with test data '''
        y_t0, y_t1 = hybrid_predict_treated_and_controlled_with_cfr(
            net, test_rmse_ite_loader, ctx)
        if FLAGS.normalize_input:
            y_t0, y_t1 = y_t0 * test_yf_std + test_yf_m, y_t1 * test_yf_std + test_yf_m
        test_score = test_evaluator.get_metrics(y_t1, y_t0)
        test_scores[test_experiment, :] = test_score

        log(
            logfile,
            '[Test Replication {}/{}]:\tRMSE ITE: {:0.3f},\t\t ATE: {:0.3f},\t\t PEHE: {:0.3f}'
            .format(test_experiment + 1, test_experiments, test_score[0],
                    test_score[1], test_score[2]))

    # test_scores = np.sqrt(test_scores) # todo
    means, stds = np.mean(test_scores, axis=0), sem(test_scores,
                                                    axis=0,
                                                    ddof=0)
    log(logfile, 'test RMSE ITE: {:.3f} ± {:.3f}, test ATE: {:.3f} ± {:.3f}, test PEHE: {:.3f} ± {:.3f}' \
                 ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2]))
示例#5
0
def run(args, outdir):
    """ Run training for NN4 architecture with Variational Bayes. """
    ''' Hyperparameters '''
    epochs = int(args.iterations)
    learning_rate = float(args.learning_rate)
    wd = float(args.weight_decay)
    hidden_size = int(args.hidden_size)
    train_experiments = int(args.experiments)
    learning_rate_factor = float(args.learning_rate_factor)
    learning_rate_steps = int(
        args.learning_rate_steps
    )  # changes the learning rate for every n updates.
    epoch_output_iter = int(args.epoch_output_iter)
    ''' Logging '''
    logfile = outdir + 'log.txt'
    f = open(logfile, 'w')
    f.close()

    config = {  # TODO may need adjustments
        # "sigma_p1": 1.5,
        "sigma_p1": 1.75,  # og
        # "sigma_p2": 0.25,
        # "sigma_p2": 0.5, # og
        "sigma_p2": 0.5,
        "pi": 0.5,
        "lambda_p": 24.5
    }
    ''' Set GPUs/CPUs '''
    num_gpus = mx.context.num_gpus()
    num_workers = int(
        args.num_workers)  # replace num_workers with the number of cores
    ctx = [mx.gpu(i) for i in range(num_gpus)
           ] if num_gpus > 0 else [mx.cpu()]  # todo change as cfr_net_train
    batch_size_per_unit = int(args.batch_size_per_unit)  # mini-batch size
    batch_size = batch_size_per_unit * max(num_gpus, 1)
    ''' Set seeds '''
    for c in ctx:
        mx.random.seed(int(args.seed), c)
    np.random.seed(int(args.seed))
    ''' Feed Forward Neural Network Model (4 hidden layers) '''
    net = ff4_relu_architecture(hidden_size)
    ''' Load datasets '''
    # train_dataset = load_data('../' + args.data_dir + args.data_train) # PyCharm run
    train_dataset = load_data(args.data_dir + args.data_train)  # Terminal run

    log(logfile, 'Training data: ' + args.data_dir + args.data_train)
    log(logfile, 'Valid data:     ' + args.data_dir + args.data_test)
    log(
        logfile, 'Loaded data with shape [%d,%d]' %
        (train_dataset['n'], train_dataset['dim']))

    # ''' Feature correlation '''
    # import pandas as pd
    # df = pd.DataFrame.from_records(train_dataset['x'][:, :, 20])
    # df.insert(25, "t", train_dataset['t'][:, 20])
    # corr = df.corr()
    # import seaborn as sns
    # sns.heatmap(corr, xticklabels=corr.columns, yticklabels=corr.columns, annot=True, fmt='.1f')
    ''' Instantiate net '''
    ''' Param. init. '''
    net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)
    net.hybridize()
    ''' Forward-propagate a single data set entry once to set up all network 
    parameters (weights and biases) with the desired initializer specified above. '''
    x = train_dataset['x'][:, :, 0]
    t = np.reshape(train_dataset['t'][:, 0], (-1, 1))
    yf = train_dataset['yf'][:, 0]
    yf_m, yf_std = np.mean(yf, axis=0), np.std(yf, axis=0)
    yf = (yf - yf_m) / yf_std
    factual_features = np.hstack((x, t))
    zero_train_factual_dataset = gluon.data.ArrayDataset(
        mx.nd.array(factual_features), mx.nd.array(yf))
    zero_train_factual_loader = gluon.data.DataLoader(
        zero_train_factual_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers)
    for i, (batch_f_features,
            batch_yf) in enumerate(zero_train_factual_loader):
        batch_f_features = gluon.utils.split_and_load(batch_f_features,
                                                      ctx_list=ctx,
                                                      even_split=False)
        [net(x) for x in batch_f_features]
        break

    weight_scale = .1
    rho_offset = -3
    lambda_init = 25
    ''' Initialize variational parameters; mean and variance for each weight '''
    mus = []
    rhos = []
    lambdas = []

    shapes = list(map(lambda x: x.shape, net.collect_params().values()))

    for shape in shapes:
        # mu = gluon.Parameter('mu', shape=shape, init=mx.init.Normal(weight_scale))
        # rho = gluon.Parameter('rho', shape=shape, init=mx.init.Constant(rho_offset))
        lmb = gluon.Parameter('lmb',
                              shape=shape,
                              init=mx.init.Constant(lambda_init))
        # mu.initialize(ctx=ctx)
        # rho.initialize(ctx=ctx)
        lmb.initialize(ctx=ctx)
        # mus.append(mu)
        # rhos.append(rho)
        lambdas.append(lmb)
    # variational_params = mus + rhos
    variational_params = lambdas

    # raw_mus = list(map(lambda x: x.data(ctx[0]), mus))
    # raw_rhos = list(map(lambda x: x.data(ctx[0]), rhos))
    raw_lambdas = list(map(lambda x: x.data(ctx[0]), lambdas))
    ''' Metric, Loss and Optimizer '''
    rmse_metric = mx.metric.RMSE()
    l2_loss = gluon.loss.L2Loss()
    bbb_loss = BBBLoss(ctx[0],
                       log_prior="exponential",
                       sigma_p1=config['sigma_p1'],
                       sigma_p2=config['sigma_p2'],
                       pi=config['pi'],
                       lambda_p=config['lambda_p'])
    # bbb_loss = BBBLoss(ctx[0], log_prior="scale_mixture", sigma_p1=config['sigma_p1'], sigma_p2=config['sigma_p2'],
    #                    pi=config['pi'])
    scheduler = mx.lr_scheduler.FactorScheduler(step=learning_rate_steps,
                                                factor=learning_rate_factor,
                                                base_lr=learning_rate)
    # optimizer = mx.optimizer.Adam(learning_rate=learning_rate, lr_scheduler=scheduler)
    optimizer = mx.optimizer.RMSProp(learning_rate=learning_rate,
                                     lr_scheduler=scheduler,
                                     wd=wd)
    # optimizer = mx.optimizer.Adam(learning_rate=learning_rate)
    trainer = gluon.Trainer(variational_params, optimizer=optimizer)
    ''' Initialize train score results '''
    train_scores = np.zeros((train_experiments, 3))
    ''' Initialize train experiment durations '''
    train_durations = np.zeros((train_experiments, 1))
    ''' Initialize test score results '''
    test_scores = np.zeros((train_experiments, 3))
    ''' Train experiments means and stds '''
    means = np.array([])
    stds = np.array([])
    ''' Train '''
    for train_experiment in range(train_experiments):
        ''' Create training dataset '''
        x = train_dataset['x'][:, :, train_experiment]
        t = np.reshape(train_dataset['t'][:, train_experiment], (-1, 1))
        yf = train_dataset['yf'][:, train_experiment]
        ycf = train_dataset['ycf'][:, train_experiment]
        mu0 = train_dataset['mu0'][:, train_experiment]
        mu1 = train_dataset['mu1'][:, train_experiment]

        train, valid, test, _ = split_data_in_train_valid_test(
            x, t, yf, ycf, mu0, mu1)
        ''' With-in sample '''
        train_evaluator = Evaluator(
            np.concatenate([train['t'], valid['t']]),
            np.concatenate([train['yf'], valid['yf']]),
            y_cf=np.concatenate([train['ycf'], valid['ycf']], axis=0),
            mu0=np.concatenate([train['mu0'], valid['mu0']], axis=0),
            mu1=np.concatenate([train['mu1'], valid['mu1']], axis=0))
        test_evaluator = Evaluator(test['t'], test['yf'], test['ycf'],
                                   test['mu0'], test['mu1'])
        ''' Normalize yf '''  # TODO check for normalize input?
        yf_m, yf_std = np.mean(train['yf'], axis=0), np.std(train['yf'],
                                                            axis=0)
        train['yf'] = (train['yf'] - yf_m) / yf_std
        valid['yf'] = (valid['yf'] - yf_m) / yf_std
        test['yf'] = (test['yf'] - yf_m) / yf_std
        ''' Save mean and std '''
        means = np.append(means, yf_m)
        stds = np.append(stds, yf_std)
        ''' Train dataset '''
        factual_features = np.hstack((train['x'], train['t']))
        train_factual_dataset = gluon.data.ArrayDataset(
            mx.nd.array(factual_features), mx.nd.array(train['yf']))
        ''' With-in sample '''
        train_rmse_ite_dataset = gluon.data.ArrayDataset(
            mx.nd.array(np.concatenate([train['x'], valid['x']])))
        ''' Valid dataset '''
        valid_factual_features = np.hstack((valid['x'], valid['t']))
        valid_factual_dataset = gluon.data.ArrayDataset(
            mx.nd.array(valid_factual_features), mx.nd.array(valid['yf']))
        ''' Test dataset '''
        test_rmse_ite_dataset = gluon.data.ArrayDataset(mx.nd.array(test['x']))
        ''' Train DataLoader '''
        train_factual_loader = gluon.data.DataLoader(train_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=True,
                                                     num_workers=num_workers)
        train_rmse_ite_loader = gluon.data.DataLoader(train_rmse_ite_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=False,
                                                      num_workers=num_workers)
        ''' Valid DataLoader '''
        valid_factual_loader = gluon.data.DataLoader(valid_factual_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)
        ''' Test DataLoader '''
        test_rmse_ite_loader = gluon.data.DataLoader(test_rmse_ite_dataset,
                                                     batch_size=batch_size,
                                                     shuffle=False,
                                                     num_workers=num_workers)

        num_batch = len(train_factual_loader)

        train_start = time.time()

        train_acc = []
        test_acc = []
        ''' Train model '''
        for epoch in range(
                1, epochs +
                1):  # start with epoch 1 for easier learning rate calculation

            train_loss = 0
            rmse_metric.reset()

            for i, (batch_f_features,
                    batch_yf) in enumerate(train_factual_loader):
                ''' Get data and labels into slices and copy each slice into a context.'''
                batch_f_features = batch_f_features.as_in_context(
                    ctx[0]).reshape((-1, 26))
                batch_yf = batch_yf.as_in_context(ctx[0]).reshape(
                    (len(batch_yf), -1))
                ''' Forward '''
                with autograd.record():
                    ''' Generate sample '''
                    # layer_params, sigmas = generate_weight_sample(shapes, raw_mus, raw_rhos, ctx[0])
                    layer_params = generate_weight_sample_exp(
                        shapes, raw_lambdas, ctx[0])
                    ''' Overwrite network parameters with sampled parameters '''
                    for sample, param in zip(layer_params,
                                             net.collect_params().values()):
                        param._data[0] = sample
                    ''' Forward-propagate the batch '''
                    outputs = net(batch_f_features)

                    # if epoch == epochs:
                    #     ''' Factual outcomes and batch_yf histograms '''
                    #     import pandas as pd
                    #     df = pd.DataFrame({'layer_params': layer_params[6][0].asnumpy().flatten()}, columns=['layer_params'])
                    #     df = pd.DataFrame(
                    #         {'outputs': outputs.asnumpy().flatten(), 'batch_yf': batch_yf.asnumpy().flatten()},
                    #         columns=['outputs', 'batch_yf'])
                    #     df.plot(kind='hist', alpha=0.5)
                    #     df.plot.kde()
                    ''' Calculate the loss '''
                    l2_loss_value = l2_loss(outputs, batch_yf)
                    # bbb_loss_value = bbb_loss(outputs, batch_yf, layer_params, raw_mus, sigmas, num_batch)
                    bbb_loss_value = bbb_loss(outputs, batch_yf, layer_params,
                                              raw_lambdas, [], num_batch)
                    loss = bbb_loss_value + l2_loss_value
                    # loss = bbb_loss_value
                    # loss = l2_loss_value
                    ''' Backpropagate for gradient calculation '''
                    loss.backward()
                ''' Optimize '''
                trainer.step(batch_size)

                train_loss += sum([l.mean().asscalar()
                                   for l in loss]) / len(loss)

                rmse_metric.update(batch_yf, outputs)

            if epoch % epoch_output_iter == 0 or epoch == 1:
                _, train_rmse_factual = rmse_metric.get()
                train_loss /= num_batch
                _, valid_rmse_factual = test_net_vb(net, valid_factual_loader,
                                                    layer_params, ctx)

                # _, train_RMSE = evaluate_RMSE(train_factual_loader, net, raw_mus, ctx)
                # _, test_RMSE = evaluate_RMSE(valid_factual_loader, net, raw_mus, ctx)
                # train_acc.append(np.asscalar(train_RMSE))
                # test_acc.append(np.asscalar(test_RMSE))
                # print("Epoch %s. Train-RMSE %s, Test-RMSE %s" %
                #       (epoch, train_RMSE, test_RMSE))

                log(
                    logfile, 'l2-loss: %.3f, bbb-loss: %.3f' %
                    (l2_loss_value[0].asscalar(),
                     bbb_loss_value[0].asscalar()))

                log(
                    logfile,
                    '[Epoch %d/%d] Train-rmse-factual: %.3f, loss: %.3f | Valid-rmse-factual: %.3f | learning-rate: '
                    '%.3E' % (epoch, epochs, train_rmse_factual, train_loss,
                              valid_rmse_factual, trainer.learning_rate))

        train_durations[train_experiment, :] = time.time() - train_start
        ''' Test model '''
        # y_t0, y_t1 = predict_treated_and_controlled_vb(net, train_rmse_ite_loader, raw_mus, ctx)
        y_t0, y_t1 = predict_treated_and_controlled_vb(net,
                                                       train_rmse_ite_loader,
                                                       layer_params, ctx)
        y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m
        train_score = train_evaluator.get_metrics(y_t1, y_t0)
        train_scores[train_experiment, :] = train_score

        # y_t0, y_t1 = predict_treated_and_controlled_vb(net, test_rmse_ite_loader, raw_mus, ctx)
        y_t0, y_t1 = predict_treated_and_controlled_vb(net,
                                                       test_rmse_ite_loader,
                                                       layer_params, ctx)
        y_t0, y_t1 = y_t0 * yf_std + yf_m, y_t1 * yf_std + yf_m
        test_score = test_evaluator.get_metrics(y_t1, y_t0)
        test_scores[train_experiment, :] = test_score

        log(logfile, '[Train Replication {}/{}]: train RMSE ITE: {:0.3f}, train ATE: {:0.3f}, train PEHE: {:0.3f},' \
                     ' test RMSE ITE: {:0.3f}, test ATE: {:0.3f}, test PEHE: {:0.3f}'.format(train_experiment + 1,
                                                                                             train_experiments,
                                                                                             train_score[0],
                                                                                             train_score[1],
                                                                                             train_score[2],
                                                                                             test_score[0],
                                                                                             test_score[1],
                                                                                             test_score[2]))
        # plt.plot(train_acc)
        # plt.plot(test_acc)
    ''' Save means and stds NDArray values for inference '''
    mx.nd.save(
        outdir + args.architecture.lower() + '_means_stds_ihdp_' +
        str(train_experiments) + '_.nd', {
            "means": mx.nd.array(means),
            "stds": mx.nd.array(stds)
        })
    ''' Export trained model '''
    net.export(outdir + args.architecture.lower() + "-ihdp-predictions-" +
               str(train_experiments),
               epoch=epochs)

    log(logfile,
        '\n{} architecture total scores:'.format(args.architecture.upper()))
    ''' Train and test scores '''
    means, stds = np.mean(train_scores, axis=0), sem(train_scores,
                                                     axis=0,
                                                     ddof=0)
    r_pehe_mean, r_pehe_std = np.mean(np.sqrt(train_scores[:, 2]),
                                      axis=0), sem(np.sqrt(train_scores[:, 2]),
                                                   axis=0,
                                                   ddof=0)
    train_total_scores_str = 'train RMSE ITE: {:.2f} ± {:.2f}, train ATE: {:.2f} ± {:.2f}, train PEHE: {:.2f} ± {:.2f}, ' \
                             'train root PEHE: {:.2f} ± {:.2f}' \
                             ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std)

    means, stds = np.mean(test_scores, axis=0), sem(test_scores,
                                                    axis=0,
                                                    ddof=0)
    r_pehe_mean, r_pehe_std = np.mean(np.sqrt(test_scores[:, 2]),
                                      axis=0), sem(np.sqrt(test_scores[:, 2]),
                                                   axis=0,
                                                   ddof=0)
    test_total_scores_str = 'test RMSE ITE: {:.2f} ± {:.2f}, test ATE: {:.2f} ± {:.2f}, test PEHE: {:.2f} ± {:.2f}, ' \
                            'test root PEHE: {:.2f} ± {:.2f}' \
                            ''.format(means[0], stds[0], means[1], stds[1], means[2], stds[2], r_pehe_mean, r_pehe_std)

    log(logfile, train_total_scores_str)
    log(logfile, test_total_scores_str)

    mean_duration = float("{0:.2f}".format(
        np.mean(train_durations, axis=0)[0]))

    with open(outdir + args.architecture.lower() + "-total-scores-" +
              str(train_experiments),
              "w",
              encoding="utf8") as text_file:
        print(train_total_scores_str,
              "\n",
              test_total_scores_str,
              file=text_file)

    return {
        "ite": "{:.2f} ± {:.2f}".format(means[0], stds[0]),
        "ate": "{:.2f} ± {:.2f}".format(means[1], stds[1]),
        "pehe": "{:.2f} ± {:.2f}".format(means[2], stds[2]),
        "mean_duration": mean_duration
    }