示例#1
0
def execute(dataset,
            n_hidden_u,
            num_epochs=500,
            learning_rate=.001,
            learning_rate_annealing=1.0,
            lmd=.0001,
            embedding_input='raw',
            which_fold=0,
            save_path='/Tmp/$USER/feature_selection/newmodel/',
            save_copy='/Tmp/$USER/feature_selection/newmodel/',
            dataset_path='/Tmp/$USER/feature_selection/newmodel/'):

    # Load the dataset
    print("Loading data")
    x_unsup = mlh.load_data(dataset,
                            dataset_path,
                            None,
                            which_fold=which_fold,
                            keep_labels=1.0,
                            missing_labels_val=-1.0,
                            embedding_input=embedding_input,
                            transpose=True)

    x_train = x_unsup[0][0]
    x_valid = x_unsup[1][0]

    # Extract required information from data
    n_row, n_col = x_train.shape
    print('Data size ' + str(n_row) + 'x' + str(n_col))

    # Set some variables
    batch_size = 256

    # Define experiment name
    exp_name = 'pretrain_' + mlh.define_exp_name(
        1., 0, 0, 0, lmd, n_hidden_u, [], [], [], which_fold, embedding_input,
        learning_rate, 0, 0, 'reconst_loss', learning_rate_annealing)
    print('Experiment: ' + exp_name)

    # Preparing folder to save stuff
    save_path = os.path.join(save_path, dataset, exp_name)
    save_copy = os.path.join(save_copy, dataset, exp_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Prepare Theano variables for inputs and targets
    input_var = T.matrix('input_unsup')
    lr = theano.shared(np.float32(learning_rate), 'learning_rate')

    # Build model
    print("Building model")

    # Some checkings
    assert len(n_hidden_u) > 0

    # Build unsupervised network
    encoder_net = InputLayer((None, n_col), input_var)

    for out in n_hidden_u:
        encoder_net = DenseLayer(encoder_net, num_units=out, nonlinearity=tanh)
        encoder_net = DropoutLayer(encoder_net)

    decoder_net = encoder_net
    for i in range(len(n_hidden_u) - 2, -1, -1):
        decoder_net = DenseLayer(decoder_net,
                                 num_units=n_hidden_u[i],
                                 nonlinearity=linear)
        decoder_net = DropoutLayer(decoder_net)

    decoder_net = DenseLayer(decoder_net, num_units=n_col, nonlinearity=linear)

    if embedding_input == 'raw' or embedding_input == 'w2v':
        final_nonlin = linear
    elif embedding_input == 'bin':
        final_nonlin = sigmoid
    elif 'histo' in embedding_input:
        final_nonlin = softmax

    if embedding_input == 'histo3x26':
        laySize = lasagne.layers.get_output(decoder_net).shape
        decoder_net = ReshapeLayer(decoder_net, (laySize[0] * 26, 3))

    decoder_net = NonlinearityLayer(decoder_net, nonlinearity=final_nonlin)

    if embedding_input == 'histo3x26':
        decoder_net = ReshapeLayer(decoder_net, (laySize[0], laySize[1]))

    print("Building and compiling training functions")
    # Build and compile training functions
    predictions, predictions_det = mh.define_predictions(
        [encoder_net, decoder_net], start=0)
    prediction_sup, prediction_sup_det = mh.define_predictions(
        [encoder_net, decoder_net], start=0)

    # Define losses
    # reconstruction losses
    loss, loss_det = mh.define_loss(predictions[1], predictions_det[1],
                                    input_var, embedding_input)

    # Define parameters
    params = lasagne.layers.get_all_params(decoder_net, trainable=True)

    l2_penalty = apply_penalty(params, l2)
    loss = loss + lmd * l2_penalty
    loss_det = loss_det + lmd * l2_penalty

    # Compute network updates
    updates = lasagne.updates.adam(loss, params, learning_rate=lr)
    # updates = lasagne.updates.sgd(loss,
    #                              params,
    #                              learning_rate=lr)
    # updates = lasagne.updates.momentum(loss, params,
    #                                    learning_rate=lr, momentum=0.0)

    # Apply norm constraints on the weights
    for k in updates.keys():
        if updates[k].ndim == 2:
            updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0)

    # Compile training function
    train_fn = theano.function([input_var],
                               loss,
                               updates=updates,
                               on_unused_input='ignore')

    # Expressions required for test
    monitor_labels = ['loss']
    val_outputs = [loss_det]

    # Add some monitoring on the learned feature embedding
    val_outputs += [
        predictions[0].min(), predictions[0].mean(), predictions[0].max(),
        predictions[0].var()
    ]
    monitor_labels += [
        "feat. emb. min", "feat. emb. mean", "feat. emb. max", "feat. emb. var"
    ]

    # Compile validation function
    val_fn = theano.function([input_var], val_outputs)

    pred_feat_emb = theano.function([input_var], predictions_det[0])

    # Finally, launch the training loop.
    print("Starting training...")

    # Some variables
    max_patience = 100
    patience = 0

    train_monitored = []
    valid_monitored = []
    train_loss = []

    nb_minibatches = n_row / batch_size
    print("Nb of minibatches: " + str(nb_minibatches))
    start_training = time.time()
    for epoch in range(num_epochs):
        start_time = time.time()
        print("Epoch {} of {}".format(epoch + 1, num_epochs))

        loss_epoch = 0

        # Train pass
        for batch in mlh.iterate_minibatches_unsup(x_train,
                                                   batch_size,
                                                   shuffle=True):
            loss_epoch += train_fn(batch)

        loss_epoch /= nb_minibatches
        train_loss += [loss_epoch]

        train_minibatches = mlh.iterate_minibatches_unsup(x_train,
                                                          batch_size,
                                                          shuffle=True)
        train_err = mlh.monitoring(train_minibatches,
                                   "train",
                                   val_fn,
                                   monitor_labels,
                                   start=0)
        train_monitored += [train_err]

        # Validation pass
        valid_minibatches = mlh.iterate_minibatches_unsup(x_valid,
                                                          batch_size,
                                                          shuffle=True)

        valid_err = mlh.monitoring(valid_minibatches,
                                   "valid",
                                   val_fn,
                                   monitor_labels,
                                   start=0)

        valid_monitored += [valid_err]

        try:
            early_stop_val = valid_err[monitor_labels.index('loss')]
        except:
            raise ValueError("There is no monitored value by the name of %s" %
                             early_stop_criterion)

        # Eearly stopping
        if epoch == 0:
            best_valid = early_stop_val
        elif early_stop_val < best_valid:
            best_valid = early_stop_val
            patience = 0

            # Save stuff
            np.savez(
                os.path.join(save_path, 'model_enc_unsupervised_best.npz'),
                *lasagne.layers.get_all_param_values(encoder_net))
            np.savez(os.path.join(save_path, 'model_ae_unsupervised_best.npz'),
                     *lasagne.layers.get_all_param_values(encoder_net))
            np.savez(os.path.join(save_path, "errors_unsupervised_best.npz"),
                     zip(*train_monitored), zip(*valid_monitored))
        else:
            patience += 1
            # Save stuff
            np.savez(
                os.path.join(save_path, 'model_enc_unsupervised_last.npz'),
                *lasagne.layers.get_all_param_values(encoder_net))
            np.savez(os.path.join(save_path, 'model_ae_unsupervised_last.npz'),
                     *lasagne.layers.get_all_param_values(encoder_net))
            np.savez(os.path.join(save_path, "errors_unsupervised_last.npz"),
                     zip(*train_monitored), zip(*valid_monitored))

        # End training
        if patience == max_patience or epoch == num_epochs - 1:
            print("   Ending training")
            # Load unsupervised best model
            if not os.path.exists(save_path +
                                  '/model_enc_unsupervised_best.npz'):
                print("No saved model to be tested and/or generate"
                      " the embedding !")
            else:
                with np.load(save_path +
                             '/model_enc_unsupervised_best.npz', ) as f:
                    param_values = [
                        f['arr_%d' % i] for i in range(len(f.files))
                    ]
                    lasagne.layers.set_all_param_values(
                        encoder_net, param_values)

                # Save embedding
                preds = []
                for batch in mlh.iterate_minibatches_unsup(x_train,
                                                           1,
                                                           shuffle=False):
                    preds.append(pred_feat_emb(batch))
                for batch in mlh.iterate_minibatches_unsup(x_valid,
                                                           1,
                                                           shuffle=False):
                    preds.append(pred_feat_emb(batch))
                preds = np.vstack(preds)
                np.savez(os.path.join(save_path, 'feature_embedding.npz'),
                         preds)

            # Stop
            print(" epoch time:\t\t\t{:.3f}s".format(time.time() - start_time))
            break

        print("  epoch time:\t\t\t{:.3f}s".format(time.time() - start_time))
        # Anneal the learning rate
        lr.set_value(float(lr.get_value() * learning_rate_annealing))

    # Print all final errors for train, validation and test
    print("Training time:\t\t\t{:.3f}s".format(time.time() - start_training))

    # Copy files to loadpath
    if save_path != save_copy:
        print('Copying model and other training files to {}'.format(save_copy))
        copy_tree(save_path, save_copy)
示例#2
0
def execute(dataset,
            n_hidden_t_enc,
            n_hidden_s,
            num_epochs=500,
            learning_rate=.001,
            learning_rate_annealing=1.0,
            gamma=1,
            lmd=0.,
            disc_nonlinearity="sigmoid",
            keep_labels=1.0,
            prec_recall_cutoff=True,
            missing_labels_val=-1.0,
            which_fold=1,
            early_stop_criterion='loss',
            embedding_input='raw',
            save_path='/Tmp/romerosa/feature_selection/',
            save_copy='/Tmp/romerosa/feature_selection/',
            dataset_path='/Tmp/carriepl/datasets/',
            resume=False,
            exp_name=None):

    # Load the dataset
    print("Loading data")
    x_train, y_train, x_valid, y_valid, x_test, y_test, \
        x_unsup, training_labels = mlh.load_data(
            dataset, dataset_path, None,
            which_fold=which_fold, keep_labels=keep_labels,
            missing_labels_val=missing_labels_val,
            embedding_input=embedding_input)

    # Extract required information from data
    n_samples, n_feats = x_train.shape
    print("Number of features : ", n_feats)
    print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1]))
    n_targets = y_train.shape[1]

    # Set some variables
    batch_size = 1

    # Preparing folder to save stuff
    print("Experiment: " + exp_name)
    save_path = os.path.join(save_path, dataset, exp_name)
    save_copy = os.path.join(save_copy, dataset, exp_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Prepare Theano variables for inputs and targets
    input_var_sup = T.matrix('input_sup')
    target_var_sup = T.matrix('target_sup')
    lr = theano.shared(np.float32(learning_rate), 'learning_rate')

    # Build model
    print("Building model")
    discrim_net = InputLayer((None, n_feats), input_var_sup)
    discrim_net = DenseLayer(discrim_net,
                             num_units=n_hidden_t_enc[-1],
                             nonlinearity=rectify)

    # Reconstruct the input using dec_feat_emb
    if gamma > 0:
        reconst_net = DenseLayer(discrim_net,
                                 num_units=n_feats,
                                 nonlinearity=linear)
        nets = [reconst_net]
    else:
        nets = [None]

    # Add supervised hidden layers
    for hid in n_hidden_s:
        discrim_net = DropoutLayer(discrim_net)
        discrim_net = DenseLayer(discrim_net, num_units=hid)

    assert disc_nonlinearity in ["sigmoid", "linear", "rectify", "softmax"]
    discrim_net = DropoutLayer(discrim_net)
    discrim_net = DenseLayer(discrim_net,
                             num_units=n_targets,
                             nonlinearity=eval(disc_nonlinearity))

    print("Building and compiling training functions")

    # Build and compile training functions
    predictions, predictions_det = mh.define_predictions(nets, start=0)
    prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net])
    prediction_sup = prediction_sup[0]
    prediction_sup_det = prediction_sup_det[0]

    # Define losses
    # reconstruction losses
    reconst_losses, reconst_losses_det = mh.define_reconst_losses(
        predictions, predictions_det, [input_var_sup])
    # supervised loss
    sup_loss, sup_loss_det = mh.define_sup_loss(disc_nonlinearity,
                                                prediction_sup,
                                                prediction_sup_det,
                                                keep_labels, target_var_sup,
                                                missing_labels_val)

    inputs = [input_var_sup, target_var_sup]
    params = lasagne.layers.get_all_params([discrim_net] + nets,
                                           trainable=True)

    print('Number of params: ' + str(len(params)))

    # Combine losses
    loss = sup_loss + gamma * reconst_losses[0]
    loss_det = sup_loss_det + gamma * reconst_losses_det[0]

    l2_penalty = apply_penalty(params, l2)
    loss = loss + lmd * l2_penalty
    loss_det = loss_det + lmd * l2_penalty

    # Compute network updates
    updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr)
    # updates = lasagne.updates.sgd(loss,
    #                               params,
    #                               learning_rate=lr)
    # updates = lasagne.updates.momentum(loss, params,
    #                                    learning_rate=lr, momentum=0.0)

    # Apply norm constraints on the weights
    for k in updates.keys():
        if updates[k].ndim == 2:
            updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0)

    # Compile training function
    train_fn = theano.function(inputs,
                               loss,
                               updates=updates,
                               on_unused_input='ignore')

    # Monitoring Labels
    monitor_labels = ["reconst. loss"]
    monitor_labels = [
        i for i, j in zip(monitor_labels, reconst_losses) if j != 0
    ]
    monitor_labels += ["loss. sup.", "total loss"]

    # Build and compile test function
    val_outputs = reconst_losses_det
    val_outputs = [i for i, j in zip(val_outputs, reconst_losses) if j != 0]
    val_outputs += [sup_loss_det, loss_det]

    # Compute accuracy and add it to monitoring list
    test_acc, test_pred = mh.define_test_functions(disc_nonlinearity,
                                                   prediction_sup,
                                                   prediction_sup_det,
                                                   target_var_sup)
    monitor_labels.append("accuracy")
    val_outputs.append(test_acc)

    # Compile prediction function
    predict = theano.function([input_var_sup], test_pred)

    # Compile validation function
    val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs,
                             on_unused_input='ignore')

    # Finally, launch the training loop.
    print("Starting testing...")

    if not os.path.exists(save_copy + '/model_feat_sel_best.npz'):
        print("No saved model to be tested and/or generate" " the embedding !")
    else:
        with np.load(save_copy + '/model_feat_sel_best.npz', ) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            lasagne.layers.set_all_param_values(
                filter(None, nets) + [discrim_net], param_values)

            test_minibatches = mlh.iterate_minibatches(x_test,
                                                       y_test,
                                                       batch_size,
                                                       shuffle=False)

            test_err, pred, targets = mlh.monitoring(test_minibatches,
                                                     "test",
                                                     val_fn,
                                                     monitor_labels,
                                                     prec_recall_cutoff,
                                                     return_pred=True)

        lab = targets.argmax(1)
        pred_argmax = pred.argmax(1)

        continent_cat = mh.create_1000_genomes_continent_labels()

        lab_cont = np.zeros(lab.shape)
        pred_cont = np.zeros(pred_argmax.shape)

        for i, c in enumerate(continent_cat):
            for el in c:
                lab_cont[lab == el] = i
                pred_cont[pred_argmax == el] = i

        cm_e = np.zeros((26, 26))
        cm_c = np.zeros((5, 5))

        for i in range(26):
            for j in range(26):
                cm_e[i, j] = ((pred_argmax == i) * (lab == j)).sum()

        for i in range(5):
            for j in range(5):
                cm_c[i, j] = ((pred_cont == i) * (lab_cont == j)).sum()

        np.savez(os.path.join(save_copy, 'cm' + str(which_fold) + '.npz'),
                 cm_e=cm_e,
                 cm_c=cm_c)

        print(os.path.join(save_copy, 'cm' + str(which_fold) + '.npz'))
示例#3
0
def execute(dataset, n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, n_hidden_s,
            embedding_source=None,
            num_epochs=500, learning_rate=.001, learning_rate_annealing=1.0,
            alpha=1, beta=1, gamma=1, lmd=.0001, disc_nonlinearity="sigmoid",
            encoder_net_init=0.2, decoder_net_init=0.2, keep_labels=1.0,
            prec_recall_cutoff=True, missing_labels_val=-1.0, which_fold=0,
            early_stop_criterion='loss_sup_det', embedding_input='raw',
            model_path='/Tmp/' + os.environ["USER"] + '/feature_selection/newmodel/',
            save_path='/Tmp/' + os.environ["USER"] + '/feature_selection/',
            dataset_path='/Tmp/' + os.environ["USER"] + '/datasets/',
            resume=False, exp_name=''):

    # Load the dataset
    print("Loading data")
    x_train, y_train, x_valid, y_valid, x_test, y_test, \
        x_unsup, training_labels = mlh.load_data(
            dataset, dataset_path, embedding_source,
            which_fold=which_fold, keep_labels=keep_labels,
            missing_labels_val=missing_labels_val,
            embedding_input=embedding_input)

    if x_unsup is not None:
        n_samples_unsup = x_unsup.shape[1]
    else:
        n_samples_unsup = 0

    # Extract required information from data
    n_samples, n_feats = x_train.shape
    print("Number of features : ", n_feats)
    print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1]))
    n_targets = y_train.shape[1]

    # Set some variables
    batch_size = 1
    beta = gamma if (gamma == 0) else beta

    # Preparing folder to save stuff
    if embedding_source is None:
        embedding_name = embedding_input
    else:
        embedding_name = embedding_source.replace("_", "").split(".")[0]
        # exp_name = embedding_name.rsplit('/', 1)[::-1][0] + '_'

    # exp_name += '_new_'

    # exp_name += mlh.define_exp_name(keep_labels, alpha, beta, gamma, lmd,
    #                                n_hidden_u, n_hidden_t_enc, n_hidden_t_dec,
    #                                n_hidden_s, which_fold, embedding_input,
    #                                 learning_rate, decoder_net_init,
    #                                 encoder_net_init, early_stop_criterion,
    #                                 learning_rate_annealing)

    print("Experiment: " + exp_name)
    model_path = os.path.join(model_path, dataset, exp_name)
    print(model_path)
    save_path = os.path.join(save_path, dataset, exp_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Prepare Theano variables for inputs and targets
    input_var_sup = T.matrix('input_sup')
    input_var_unsup = theano.shared(x_unsup, 'input_unsup')  # x_unsup TBD
    target_var_sup = T.matrix('target_sup')
    lr = theano.shared(np.float32(learning_rate), 'learning_rate')

    # Build model
    print("Building model")

    # Some checkings
    # assert len(n_hidden_u) > 0
    assert len(n_hidden_t_enc) > 0
    assert len(n_hidden_t_dec) > 0
    assert n_hidden_t_dec[-1] == n_hidden_t_enc[-1]

    # Build feature embedding networks (encoding and decoding if gamma > 0)
    nets, embeddings, pred_feat_emb = mh.build_feat_emb_nets(
        embedding_source, n_feats, n_samples_unsup,
        input_var_unsup, n_hidden_u, n_hidden_t_enc,
        n_hidden_t_dec, gamma, encoder_net_init,
        decoder_net_init, save_path)

    # Build feature embedding reconstruction networks (if alpha > 0, beta > 0)
    nets += mh.build_feat_emb_reconst_nets(
            [alpha, beta], n_samples_unsup, n_hidden_u,
            [n_hidden_t_enc, n_hidden_t_dec],
            nets, [encoder_net_init, decoder_net_init])

    # Supervised network
    discrim_net, hidden_rep = mh.build_discrim_net(
        batch_size, n_feats, input_var_sup, n_hidden_t_enc,
        n_hidden_s, embeddings[0], disc_nonlinearity, n_targets)

    # Reconstruct network
    nets += [mh.build_reconst_net(hidden_rep, embeddings[1] if
                                  len(embeddings) > 1
                                  else None, n_feats, gamma)]

    # Load best model
    with np.load(os.path.join(model_path, 'model_feat_sel_best.npz')) as f:
        param_values = [f['arr_%d' % i]
                        for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(filter(None, nets) +
                                        [discrim_net],
                                        param_values)

    print("Building and compiling training functions")

    # Build and compile training functions
    predictions, predictions_det = mh.define_predictions(nets, start=2)
    prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net])
    prediction_sup = prediction_sup[0]
    prediction_sup_det = prediction_sup_det[0]

    # Define losses
    # reconstruction losses
    _, reconst_losses_det = mh.define_reconst_losses(
        predictions, predictions_det, [input_var_unsup, input_var_unsup,
                                       input_var_sup])
    # supervised loss
    _, sup_loss_det = mh.define_sup_loss(
        disc_nonlinearity, prediction_sup, prediction_sup_det, keep_labels,
        target_var_sup, missing_labels_val)

    # Define inputs
    inputs = [input_var_sup, target_var_sup]

    # Combine losses
    loss_det = sup_loss_det + alpha*reconst_losses_det[0] + \
        beta*reconst_losses_det[1] + gamma*reconst_losses_det[2]

    # Define parameters
    params = lasagne.layers.get_all_params(
        [discrim_net] + filter(None, nets), trainable=True)

    l2_penalty = apply_penalty(params, l2)
    loss_det = loss_det + lmd*l2_penalty

    # Monitoring Labels
    monitor_labels = ["reconst. feat. W_enc",
                      "reconst. feat. W_dec",
                      "reconst. loss"]
    monitor_labels = [i for i, j in zip(monitor_labels, reconst_losses_det)
                      if j != 0]
    monitor_labels += ["feat. W_enc. mean", "feat. W_enc var"]
    monitor_labels += ["feat. W_dec. mean", "feat. W_dec var"] if \
        (embeddings[1] is not None) else []
    monitor_labels += ["loss. sup.", "total loss"]

    # Build and compile test function
    val_outputs = reconst_losses_det
    val_outputs = [i for i, j in zip(val_outputs, reconst_losses_det) if j != 0]
    val_outputs += [embeddings[0].mean(), embeddings[0].var()]
    val_outputs += [embeddings[1].mean(), embeddings[1].var()] if \
        (embeddings[1] is not None) else []
    val_outputs += [sup_loss_det, loss_det]

    # Compute accuracy and add it to monitoring list
    test_acc, test_pred = mh.define_test_functions(
        disc_nonlinearity, prediction_sup, prediction_sup_det, target_var_sup)
    monitor_labels.append("accuracy")
    val_outputs.append(test_acc)

    # Compile prediction function
    predict = theano.function([input_var_sup], test_pred)

    # Compile validation function
    val_fn = theano.function(inputs,
                             [prediction_sup_det] + val_outputs,
                             on_unused_input='ignore')


    # Finally, launch the testing loop.
    print("Starting testing...")
    test_minibatches = mlh.iterate_minibatches(x_test, y_test, batch_size,
                                               shuffle=False)
    test_err, pred, targets = mlh.monitoring(test_minibatches, "test", val_fn,
                                             monitor_labels, prec_recall_cutoff,
                                             return_pred=True)

    lab = targets.argmax(1)
    pred_argmax = pred.argmax(1)

    continent_cat = mh.create_1000_genomes_continent_labels()

    lab_cont = np.zeros(lab.shape)
    pred_cont = np.zeros(pred_argmax.shape)

    for i,c in enumerate(continent_cat):
        for el in c:
            lab_cont[lab == el] = i
            pred_cont[pred_argmax == el] = i

    cm_e = np.zeros((26, 26))
    cm_c = np.zeros((5,5))

    for i in range(26):
        for j in range(26):
            cm_e[i, j] = ((pred_argmax == i) * (lab == j)).sum()

    for i in range(5):
        for j in range(5):
            cm_c[i, j] = ((pred_cont == i) * (lab_cont == j)).sum()

    np.savez(os.path.join(save_path, 'cm'+str(which_fold)+'.npz'),
             cm_e=cm_e, cm_c=cm_c)

    print(os.path.join(save_path, 'cm' + str(which_fold) + '.npz'))
示例#4
0
def execute(
        dataset,
        n_hidden_u,
        n_hidden_t_enc,
        n_hidden_t_dec,
        n_hidden_s,
        embedding_source=None,
        num_epochs=500,
        learning_rate=.001,
        learning_rate_annealing=1.0,
        alpha=1,
        beta=1,
        gamma=1,
        lmd=.0001,
        disc_nonlinearity="sigmoid",
        encoder_net_init=0.2,
        decoder_net_init=0.2,
        keep_labels=1.0,
        prec_recall_cutoff=True,
        missing_labels_val=-1.0,
        which_fold=0,
        early_stop_criterion='loss_sup_det',
        embedding_input='raw',
        save_path='/Tmp/' + os.environ["USER"] +
    '/savepath/',  # a default value was needed?
        save_copy='/Tmp/' + os.environ["USER"] + '/savecopy/',
        dataset_path='/Tmp/' + os.environ["USER"] + '/datasets/',
        resume=False,
        exp_name='',
        random_proj=0):

    # Load the dataset
    print("Loading data")
    x_train, y_train, x_valid, y_valid, x_test, y_test, \
        x_unsup, training_labels = mlh.load_data(
            dataset, dataset_path, embedding_source,
            which_fold=which_fold, keep_labels=keep_labels,
            missing_labels_val=missing_labels_val,
            embedding_input=embedding_input)

    if x_unsup is not None:
        n_samples_unsup = x_unsup.shape[1]
    else:
        n_samples_unsup = 0

    # Extract required information from data
    n_samples, n_feats = x_train.shape
    print("Number of features : ", n_feats)
    print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1]))
    n_targets = y_train.shape[1]

    # Set some variables
    batch_size = 128
    beta = gamma if (gamma == 0) else beta

    # Preparing folder to save stuff
    if embedding_source is None:
        embedding_name = embedding_input
    else:
        embedding_name = embedding_source.replace("_", "").split(".")[0]
        exp_name += embedding_name.rsplit('/', 1)[::-1][0] + '_'

    exp_name += 'final_'

    exp_name += mlh.define_exp_name(keep_labels, alpha, beta, gamma, lmd,
                                    n_hidden_u, n_hidden_t_enc, n_hidden_t_dec,
                                    n_hidden_s, which_fold, embedding_input,
                                    learning_rate, decoder_net_init,
                                    encoder_net_init, early_stop_criterion,
                                    learning_rate_annealing)

    print("Experiment: " + exp_name)
    save_path = os.path.join(save_path, dataset, exp_name)
    save_copy = os.path.join(save_copy, dataset, exp_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    if not os.path.exists(save_copy):
        os.makedirs(save_copy)

    # Prepare Theano variables for inputs and targets
    input_var_sup = T.matrix('input_sup')
    input_var_unsup = theano.shared(x_unsup, 'input_unsup')  # x_unsup TBD
    target_var_sup = T.matrix('target_sup')
    lr = theano.shared(np.float32(learning_rate), 'learning_rate')

    # Build model
    print("Building model")

    # Some checkings
    # assert len(n_hidden_u) > 0
    assert len(n_hidden_t_enc) > 0
    assert len(n_hidden_t_dec) > 0
    assert n_hidden_t_dec[-1] == n_hidden_t_enc[-1]

    # Build feature embedding networks (encoding and decoding if gamma > 0)
    nets, embeddings, pred_feat_emb = mh.build_feat_emb_nets(
        embedding_source, n_feats, n_samples_unsup, input_var_unsup,
        n_hidden_u, n_hidden_t_enc, n_hidden_t_dec, gamma, encoder_net_init,
        decoder_net_init, save_path, random_proj)

    # Build feature embedding reconstruction networks (if alpha > 0, beta > 0)
    nets += mh.build_feat_emb_reconst_nets(
        [alpha, beta], n_samples_unsup, n_hidden_u,
        [n_hidden_t_enc, n_hidden_t_dec], nets,
        [encoder_net_init, decoder_net_init])

    # Supervised network
    discrim_net, hidden_rep = mh.build_discrim_net(
        batch_size, n_feats, input_var_sup, n_hidden_t_enc, n_hidden_s,
        embeddings[0], disc_nonlinearity, n_targets)

    # Reconstruct network
    nets += [
        mh.build_reconst_net(hidden_rep,
                             embeddings[1] if len(embeddings) > 1 else None,
                             n_feats, gamma)
    ]

    # Load weights if we are resuming job
    if resume:
        # Load best model
        with np.load(os.path.join(save_path, 'model_feat_sel_last.npz')) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        nlayers = len(
            lasagne.layers.get_all_params(filter(None, nets) + [discrim_net]))
        lasagne.layers.set_all_param_values(
            filter(None, nets) + [discrim_net], param_values[:nlayers])

    print("Building and compiling training functions")

    # Build and compile training functions
    predictions, predictions_det = mh.define_predictions(nets, start=2)
    prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net])
    prediction_sup = prediction_sup[0]
    prediction_sup_det = prediction_sup_det[0]

    # Define losses
    # reconstruction losses
    reconst_losses, reconst_losses_det = mh.define_reconst_losses(
        predictions, predictions_det,
        [input_var_unsup, input_var_unsup, input_var_sup])
    # supervised loss
    sup_loss, sup_loss_det = mh.define_sup_loss(disc_nonlinearity,
                                                prediction_sup,
                                                prediction_sup_det,
                                                keep_labels, target_var_sup,
                                                missing_labels_val)

    # Define inputs
    inputs = [input_var_sup, target_var_sup]

    # Define parameters
    params = lasagne.layers.get_all_params([discrim_net] + filter(None, nets),
                                           trainable=True)
    params_to_freeze= \
        lasagne.layers.get_all_params(filter(None, nets), trainable=False)

    print('Number of params discrim: ' + str(len(params)))
    print('Number of params to freeze: ' + str(len(params_to_freeze)))

    for p in params_to_freeze:
        new_params = [el for el in params if el != p]
        params = new_params

    print('Number of params to update: ' + str(len(params)))

    # Combine losses
    loss = sup_loss + alpha*reconst_losses[0] + beta*reconst_losses[1] + \
        gamma*reconst_losses[2]
    loss_det = sup_loss_det + alpha*reconst_losses_det[0] + \
        beta*reconst_losses_det[1] + gamma*reconst_losses_det[2]

    l2_penalty = apply_penalty(params, l2)
    loss = loss + lmd * l2_penalty
    loss_det = loss_det + lmd * l2_penalty

    # Compute network updates
    updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr)
    # updates = lasagne.updates.sgd(loss,
    #                               params,
    #                               learning_rate=lr)
    # updates = lasagne.updates.momentum(loss, params,
    #                                    learning_rate=lr, momentum=0.0)

    # Apply norm constraints on the weights
    for k in updates.keys():
        if updates[k].ndim == 2:
            updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0)

    # Compile training function
    train_fn = theano.function(inputs,
                               loss,
                               updates=updates,
                               on_unused_input='ignore')

    # Monitoring Labels
    monitor_labels = [
        "reconst. feat. W_enc", "reconst. feat. W_dec", "reconst. loss"
    ]
    monitor_labels = [
        i for i, j in zip(monitor_labels, reconst_losses) if j != 0
    ]
    monitor_labels += ["feat. W_enc. mean", "feat. W_enc var"]
    monitor_labels += ["feat. W_dec. mean", "feat. W_dec var"] if \
        (embeddings[1] is not None) else []
    monitor_labels += ["loss. sup.", "total loss"]

    # Build and compile test function
    val_outputs = reconst_losses_det
    val_outputs = [i for i, j in zip(val_outputs, reconst_losses) if j != 0]
    val_outputs += [embeddings[0].mean(), embeddings[0].var()]
    val_outputs += [embeddings[1].mean(), embeddings[1].var()] if \
        (embeddings[1] is not None) else []
    val_outputs += [sup_loss_det, loss_det]

    # Compute accuracy and add it to monitoring list
    test_acc, test_pred = mh.define_test_functions(disc_nonlinearity,
                                                   prediction_sup,
                                                   prediction_sup_det,
                                                   target_var_sup)
    monitor_labels.append("accuracy")
    val_outputs.append(test_acc)

    # Compile prediction function
    predict = theano.function([input_var_sup], test_pred)

    # Compile validation function
    val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs,
                             on_unused_input='ignore')

    # Finally, launch the training loop.
    print("Starting training...")

    # Some variables
    max_patience = 100
    patience = 0

    train_monitored = []
    valid_monitored = []
    train_loss = []

    # Pre-training monitoring
    print("Epoch 0 of {}".format(num_epochs))

    train_minibatches = mlh.iterate_minibatches(x_train,
                                                y_train,
                                                batch_size,
                                                shuffle=False)
    train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                               monitor_labels, prec_recall_cutoff)

    valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                y_valid,
                                                batch_size,
                                                shuffle=False)
    valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                               monitor_labels, prec_recall_cutoff)

    # Training loop
    start_training = time.time()
    for epoch in range(num_epochs):
        start_time = time.time()
        print("Epoch {} of {}".format(epoch + 1, num_epochs))
        nb_minibatches = 0
        loss_epoch = 0

        # Train pass
        for batch in mlh.iterate_minibatches(x_train,
                                             training_labels,
                                             batch_size,
                                             shuffle=True):
            loss_epoch += train_fn(*batch)
            nb_minibatches += 1

        loss_epoch /= nb_minibatches
        train_loss += [loss_epoch]

        # Monitoring on the training set
        train_minibatches = mlh.iterate_minibatches(x_train,
                                                    y_train,
                                                    batch_size,
                                                    shuffle=False)
        train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                                   monitor_labels, prec_recall_cutoff)
        train_monitored += [train_err]

        # Monitoring on the validation set
        valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                    y_valid,
                                                    batch_size,
                                                    shuffle=False)

        valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                                   monitor_labels, prec_recall_cutoff)
        valid_monitored += [valid_err]

        try:
            early_stop_val = valid_err[monitor_labels.index(
                early_stop_criterion)]
        except:
            raise ValueError("There is no monitored value by the name of %s" %
                             early_stop_criterion)

        # Early stopping
        if epoch == 0:
            best_valid = early_stop_val
        elif (early_stop_val > best_valid and early_stop_criterion == 'accuracy') or \
             (early_stop_val < best_valid and early_stop_criterion == 'loss. sup.'):
            best_valid = early_stop_val
            patience = 0

            # Save stuff
            np.savez(
                os.path.join(save_path, 'model_feat_sel_best.npz'),
                *lasagne.layers.get_all_param_values(
                    filter(None, nets) + [discrim_net]))
            np.savez(save_path + "/errors_supervised_best.npz",
                     zip(*train_monitored), zip(*valid_monitored))

            # Monitor on the test set now because sometimes the saving doesn't
            # go well and there isn't a model to load at the end of training
            if y_test is not None:
                test_minibatches = mlh.iterate_minibatches(x_test,
                                                           y_test,
                                                           138,
                                                           shuffle=False)

                test_err = mlh.monitoring(test_minibatches, "test", val_fn,
                                          monitor_labels, prec_recall_cutoff)
        else:
            patience += 1
            # Save stuff
            np.savez(
                os.path.join(save_path, 'model_feat_sel_last.npz'),
                *lasagne.layers.get_all_param_values(
                    filter(None, nets) + [discrim_net]))
            np.savez(save_path + "/errors_supervised_last.npz",
                     zip(*train_monitored), zip(*valid_monitored))

        # End training
        if patience == max_patience or epoch == num_epochs - 1:
            print("Ending training")
            # Load best model
            with np.load(os.path.join(save_path,
                                      'model_feat_sel_best.npz')) as f:
                param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            nlayers = len(
                lasagne.layers.get_all_params(
                    filter(None, nets) + [discrim_net]))
            lasagne.layers.set_all_param_values(
                filter(None, nets) + [discrim_net], param_values[:nlayers])
            if embedding_source is None:
                # Save embedding
                pred = pred_feat_emb()
                np.savez(os.path.join(save_path, 'feature_embedding.npz'),
                         pred)

            # Training set results
            train_minibatches = mlh.iterate_minibatches(x_train,
                                                        y_train,
                                                        batch_size,
                                                        shuffle=False)
            train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                                       monitor_labels, prec_recall_cutoff)

            # Validation set results
            valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                        y_valid,
                                                        batch_size,
                                                        shuffle=False)
            valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                                       monitor_labels, prec_recall_cutoff)

            # Test set results
            if y_test is not None:
                test_minibatches = mlh.iterate_minibatches(x_test,
                                                           y_test,
                                                           138,
                                                           shuffle=False)

                test_err = mlh.monitoring(test_minibatches, "test", val_fn,
                                          monitor_labels, prec_recall_cutoff)
                np.savez(os.path.join(save_path, 'final_errors.npz'), test_err)
            else:
                for minibatch in mlh.iterate_testbatches(x_test,
                                                         138,
                                                         shuffle=False):
                    test_predictions = []
                    test_predictions += [predict(minibatch)]
                np.savez(os.path.join(save_path, 'test_predictions.npz'),
                         test_predictions)

            # Stop
            print("  epoch time:\t\t\t{:.3f}s \n".format(time.time() -
                                                         start_time))
            break

        print("  epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time))

        # Anneal the learning rate
        lr.set_value(float(lr.get_value() * learning_rate_annealing))

    # Print and save all final errors for train, validation and test
    print("Training time:\t\t\t{:.3f}s".format(time.time() - start_training))
    print("test_err:", test_err)

    # Copy files to loadpath
    if save_path != save_copy:
        print('Copying model and other training files to {}'.format(save_copy))
        copy_tree(save_path, save_copy)
def execute(dataset,
            n_hidden_t_enc,
            n_hidden_s,
            num_epochs=500,
            learning_rate=.001,
            learning_rate_annealing=1.0,
            gamma=1,
            lmd=0.,
            disc_nonlinearity="sigmoid",
            keep_labels=1.0,
            prec_recall_cutoff=True,
            missing_labels_val=-1.0,
            which_fold=1,
            early_stop_criterion='loss',
            save_path='/Tmp/romerosa/DietNetworks/',
            save_copy='/Tmp/romerosa/DietNetworks/',
            dataset_path='/Tmp/carriepl/datasets/',
            resume=False):

    # Load the dataset
    print("Loading data")
    x_train, y_train, x_valid, y_valid, x_test, y_test, \
        x_unsup, training_labels = mlh.load_data(
            dataset, dataset_path, None,
            which_fold=which_fold, keep_labels=keep_labels,
            missing_labels_val=missing_labels_val,
            embedding_input='raw')

    # Extract required information from data
    n_samples, n_feats = x_train.shape
    print("Number of features : ", n_feats)
    print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1]))
    n_targets = y_train.shape[1]

    # Set some variables
    batch_size = 128

    # Preparing folder to save stuff
    exp_name = 'basic_' + mlh.define_exp_name(
        keep_labels, 0, 0, gamma, lmd, [], n_hidden_t_enc, [], n_hidden_s,
        which_fold, learning_rate, 0, 0, early_stop_criterion,
        learning_rate_annealing)
    print("Experiment: " + exp_name)
    save_path = os.path.join(save_path, dataset, exp_name)
    save_copy = os.path.join(save_copy, dataset, exp_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Prepare Theano variables for inputs and targets
    input_var_sup = T.matrix('input_sup')
    target_var_sup = T.matrix('target_sup')
    lr = theano.shared(np.float32(learning_rate), 'learning_rate')

    # Build model
    print("Building model")
    discrim_net = InputLayer((None, n_feats), input_var_sup)
    discrim_net = DenseLayer(discrim_net,
                             num_units=n_hidden_t_enc[-1],
                             nonlinearity=rectify)

    # Reconstruct the input using dec_feat_emb
    if gamma > 0:
        reconst_net = DenseLayer(discrim_net,
                                 num_units=n_feats,
                                 nonlinearity=linear)
        nets = [reconst_net]
    else:
        nets = [None]

    # Add supervised hidden layers
    for hid in n_hidden_s:
        discrim_net = DropoutLayer(discrim_net)
        discrim_net = DenseLayer(discrim_net, num_units=hid)

    assert disc_nonlinearity in ["sigmoid", "linear", "rectify", "softmax"]
    discrim_net = DropoutLayer(discrim_net)
    discrim_net = DenseLayer(discrim_net,
                             num_units=n_targets,
                             nonlinearity=eval(disc_nonlinearity))

    print("Building and compiling training functions")

    # Build and compile training functions
    predictions, predictions_det = mh.define_predictions(nets, start=0)
    prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net])
    prediction_sup = prediction_sup[0]
    prediction_sup_det = prediction_sup_det[0]

    # Define losses
    # reconstruction losses
    reconst_losses, reconst_losses_det = mh.define_reconst_losses(
        predictions, predictions_det, [input_var_sup])
    # supervised loss
    sup_loss, sup_loss_det = mh.define_sup_loss(disc_nonlinearity,
                                                prediction_sup,
                                                prediction_sup_det,
                                                keep_labels, target_var_sup,
                                                missing_labels_val)

    inputs = [input_var_sup, target_var_sup]
    params = lasagne.layers.get_all_params([discrim_net] + nets,
                                           trainable=True)

    print('Number of params: ' + str(len(params)))

    # Combine losses
    loss = sup_loss + gamma * reconst_losses[0]
    loss_det = sup_loss_det + gamma * reconst_losses_det[0]

    l2_penalty = apply_penalty(params, l2)
    loss = loss + lmd * l2_penalty
    loss_det = loss_det + lmd * l2_penalty

    # Compute network updates
    updates = lasagne.updates.rmsprop(loss, params, learning_rate=lr)
    # updates = lasagne.updates.sgd(loss,
    #                               params,
    #                               learning_rate=lr)
    # updates = lasagne.updates.momentum(loss, params,
    #                                    learning_rate=lr, momentum=0.0)

    # Apply norm constraints on the weights
    for k in updates.keys():
        if updates[k].ndim == 2:
            updates[k] = lasagne.updates.norm_constraint(updates[k], 1.0)

    # Compile training function
    train_fn = theano.function(inputs,
                               loss,
                               updates=updates,
                               on_unused_input='ignore')

    # Monitoring Labels
    monitor_labels = ["reconst. loss"]
    monitor_labels = [
        i for i, j in zip(monitor_labels, reconst_losses) if j != 0
    ]
    monitor_labels += ["loss. sup.", "total loss"]

    # Build and compile test function
    val_outputs = reconst_losses_det
    val_outputs = [i for i, j in zip(val_outputs, reconst_losses) if j != 0]
    val_outputs += [sup_loss_det, loss_det]

    # Compute accuracy and add it to monitoring list
    test_acc, test_pred = mh.define_test_functions(disc_nonlinearity,
                                                   prediction_sup,
                                                   prediction_sup_det,
                                                   target_var_sup)
    monitor_labels.append("accuracy")
    val_outputs.append(test_acc)

    # Compile prediction function
    predict = theano.function([input_var_sup], test_pred)

    # Compile validation function
    val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs,
                             on_unused_input='ignore')

    # Finally, launch the training loop.
    print("Starting training...")

    # Some variables
    max_patience = 100
    patience = 0

    train_monitored = []
    valid_monitored = []
    train_loss = []

    # Pre-training monitoring
    print("Epoch 0 of {}".format(num_epochs))

    train_minibatches = mlh.iterate_minibatches(x_train,
                                                y_train,
                                                batch_size,
                                                shuffle=False)
    train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                               monitor_labels, prec_recall_cutoff)

    valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                y_valid,
                                                batch_size,
                                                shuffle=False)
    valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                               monitor_labels, prec_recall_cutoff)

    # Training loop
    start_training = time.time()
    for epoch in range(num_epochs):
        start_time = time.time()
        print("Epoch {} of {}".format(epoch + 1, num_epochs))
        nb_minibatches = 0
        loss_epoch = 0

        # Train pass
        for batch in mlh.iterate_minibatches(x_train,
                                             training_labels,
                                             batch_size,
                                             shuffle=True):
            loss_epoch += train_fn(*batch)
            nb_minibatches += 1

        loss_epoch /= nb_minibatches
        train_loss += [loss_epoch]

        # Monitoring on the training set
        train_minibatches = mlh.iterate_minibatches(x_train,
                                                    y_train,
                                                    batch_size,
                                                    shuffle=False)
        train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                                   monitor_labels, prec_recall_cutoff)
        train_monitored += [train_err]

        # Monitoring on the validation set
        valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                    y_valid,
                                                    batch_size,
                                                    shuffle=False)

        valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                                   monitor_labels, prec_recall_cutoff)
        valid_monitored += [valid_err]

        try:
            early_stop_val = valid_err[monitor_labels.index(
                early_stop_criterion)]
        except:
            raise ValueError("There is no monitored value by the name of %s" %
                             early_stop_criterion)

        # Early stopping
        if epoch == 0:
            best_valid = early_stop_val
        elif (early_stop_val > best_valid and early_stop_criterion == 'accuracy') or \
             (early_stop_val < best_valid and early_stop_criterion ==
              'loss. sup.'):
            best_valid = early_stop_val
            patience = 0

            # Save stuff
            np.savez(
                os.path.join(save_path, 'model_best.npz'),
                *lasagne.layers.get_all_param_values(
                    filter(None, nets) + [discrim_net]))
            np.savez(save_path + "/errors_supervised_best.npz",
                     zip(*train_monitored), zip(*valid_monitored))
        else:
            patience += 1
            np.savez(
                os.path.join(save_path, 'model_last.npz'),
                *lasagne.layers.get_all_param_values(
                    filter(None, nets) + [discrim_net]))
            np.savez(save_path + "/errors_supervised_last.npz",
                     zip(*train_monitored), zip(*valid_monitored))

        # End training
        if patience == max_patience or epoch == num_epochs - 1:
            print("Ending training")
            # Load best model
            if not os.path.exists(save_path + '/model_best.npz'):
                print("No saved model to be tested and/or generate"
                      " the embedding !")
            else:
                with np.load(save_path + '/model_best.npz', ) as f:
                    param_values = [
                        f['arr_%d' % i] for i in range(len(f.files))
                    ]
                    lasagne.layers.set_all_param_values(
                        filter(None, nets) + [discrim_net], param_values)

            # Training set results
            train_minibatches = mlh.iterate_minibatches(x_train,
                                                        y_train,
                                                        batch_size,
                                                        shuffle=False)
            train_err = mlh.monitoring(train_minibatches, "train", val_fn,
                                       monitor_labels, prec_recall_cutoff)

            # Validation set results
            valid_minibatches = mlh.iterate_minibatches(x_valid,
                                                        y_valid,
                                                        batch_size,
                                                        shuffle=False)
            valid_err = mlh.monitoring(valid_minibatches, "valid", val_fn,
                                       monitor_labels, prec_recall_cutoff)

            # Test set results
            if y_test is not None:
                test_minibatches = mlh.iterate_minibatches(x_test,
                                                           y_test,
                                                           batch_size,
                                                           shuffle=False)

                test_err = mlh.monitoring(test_minibatches, "test", val_fn,
                                          monitor_labels, prec_recall_cutoff)
            else:
                for minibatch in mlh.iterate_testbatches(x_test,
                                                         batch_size,
                                                         shuffle=False):
                    test_predictions = []
                    test_predictions += [predict(minibatch)]
                np.savez(os.path.join(save_path, 'test_predictions.npz'),
                         test_predictions)

            # Stop
            print("  epoch time:\t\t\t{:.3f}s \n".format(time.time() -
                                                         start_time))
            break

        print("  epoch time:\t\t\t{:.3f}s \n".format(time.time() - start_time))

        # Anneal the learning rate
        lr.set_value(float(lr.get_value() * learning_rate_annealing))

    # Print all final errors for train, validation and test
    print("Training time:\t\t\t{:.3f}s".format(time.time() - start_training))

    # Copy files to loadpath
    if save_path != save_copy:
        print('Copying model and other training files to {}'.format(save_copy))
        copy_tree(save_path, save_copy)