def run_adgmssl_mnist():
    """
    Train a auxiliary deep generative model on the mnist dataset with 100 evenly distributed labels.
    """
    n_labeled = 100  # The total number of labeled data points.
    n_samples = 100  # The number of sampled labeled data points for each batch.
    n_batches = 600  # The number of batches.
    mnist_data = mnist.load_semi_supervised(n_batches=n_batches, n_labeled=n_labeled, n_samples=n_samples,
                                            filter_std=0.0, seed=123456, train_valid_combine=True)

    n, n_x = mnist_data[0][0].shape  # Datapoints in the dataset, input features.
    bs = n / n_batches  # The batchsize.

    # Initialize the auxiliary deep generative model.
    model = ADGMSSL(n_x=n_x, n_a=100, n_z=100, n_y=10, a_hidden=[500, 500],
                    z_hidden=[500, 500], xhat_hidden=[500, 500], y_hidden=[500, 500],
                    trans_func=rectify, x_dist='bernoulli')

    # Get the training functions.
    f_train, f_test, f_validate, train_args, test_args, validate_args = model.build_model(*mnist_data)
    # Update the default function arguments.
    train_args['inputs']['batchsize'] = bs
    train_args['inputs']['batchsize_labeled'] = n_samples
    train_args['inputs']['beta'] = 0.01 * n
    train_args['inputs']['learningrate'] = 3e-4
    train_args['inputs']['beta1'] = 0.9
    train_args['inputs']['beta2'] = 0.999
    train_args['inputs']['samples'] = 10  # if running a cpu: set this the no. of samples to 1.
    test_args['inputs']['samples'] = 1
    validate_args['inputs']['samples'] = 1

    # Evaluate the approximated classification error with 100 MC samples for a good estimate.
    def error_evaluation(*args):
        mean_evals = model.get_output(mnist_data[1][0], 100)
        t_class = np.argmax(mnist_data[1][1], axis=1)
        y_class = np.argmax(mean_evals, axis=1)
        missclass = (np.sum(y_class != t_class, dtype='float32') / len(y_class)) * 100.
        train.write_to_logger("test 100-samples: %0.2f%%." % missclass)

    # Define training loop. Output training evaluations every 1 epoch and the approximated good estimate
    # of the classification error every 10 epochs.
    train = TrainModel(model=model, anneal_lr=.75, anneal_lr_freq=200, output_freq=1,
                       pickle_f_custom_freq=10, f_custom_eval=error_evaluation)
    train.add_initial_training_notes("Training the auxiliary deep generative model with %i labels." % n_labeled)
    train.train_model(f_train, train_args,
                      f_test, test_args,
                      f_validate, validate_args,
                      n_train_batches=n_batches,
                      n_epochs=3000)
def run_adgmssl_mnist():
    """
    Train a auxiliary deep generative model on the mnist dataset with 100 evenly distributed labels.
    """
    n_labeled = 100  # The total number of labeled data points.
    n_samples = 100  # The number of sampled labeled data points for each batch.
    n_batches = 600  # The number of batches.
    mnist_data = mnist.load_semi_supervised(n_batches=n_batches,
                                            n_labeled=n_labeled,
                                            n_samples=n_samples,
                                            filter_std=0.0,
                                            seed=123456,
                                            train_valid_combine=True)

    n, n_x = mnist_data[0][
        0].shape  # Datapoints in the dataset, input features.
    bs = n / n_batches  # The batchsize.

    # Initialize the auxiliary deep generative model.
    model = ADGMSSL(n_x=n_x,
                    n_a=100,
                    n_z=100,
                    n_y=10,
                    a_hidden=[500, 500],
                    z_hidden=[500, 500],
                    xhat_hidden=[500, 500],
                    y_hidden=[500, 500],
                    trans_func=rectify,
                    x_dist='bernoulli')

    # Get the training functions.
    f_train, f_test, f_validate, train_args, test_args, validate_args = model.build_model(
        *mnist_data)
    # Update the default function arguments.
    train_args['inputs']['batchsize'] = bs
    train_args['inputs']['batchsize_labeled'] = n_samples
    train_args['inputs']['beta'] = 0.01 * n
    train_args['inputs']['learningrate'] = 3e-4
    train_args['inputs']['beta1'] = 0.9
    train_args['inputs']['beta2'] = 0.999
    train_args['inputs'][
        'samples'] = 10  # if running a cpu: set this the no. of samples to 1.
    test_args['inputs']['samples'] = 1
    validate_args['inputs']['samples'] = 1

    # Evaluate the approximated classification error with 100 MC samples for a good estimate.
    def error_evaluation(*args):
        mean_evals = model.get_output(mnist_data[1][0], 100)
        t_class = np.argmax(mnist_data[1][1], axis=1)
        y_class = np.argmax(mean_evals, axis=1)
        missclass = (np.sum(y_class != t_class, dtype='float32') /
                     len(y_class)) * 100.
        train.write_to_logger("test 100-samples: %0.2f%%." % missclass)

    # Define training loop. Output training evaluations every 1 epoch and the approximated good estimate
    # of the classification error every 10 epochs.
    train = TrainModel(model=model,
                       anneal_lr=.75,
                       anneal_lr_freq=200,
                       output_freq=1,
                       pickle_f_custom_freq=10,
                       f_custom_eval=error_evaluation)
    train.add_initial_training_notes(
        "Training the auxiliary deep generative model with %i labels." %
        n_labeled)
    train.train_model(f_train,
                      train_args,
                      f_test,
                      test_args,
                      f_validate,
                      validate_args,
                      n_train_batches=n_batches,
                      n_epochs=3000)
def run_adgmssl_mnist():
    """
    Evaluate a auxiliary deep generative model on the mnist dataset with 100 evenly distributed labels.
    """

    # Load the mnist supervised dataset for evaluation.
    (train_x, train_t), (test_x, test_t), (valid_x, valid_t) = mnist.load_supervised(filter_std=0.0,
                                                                                     train_valid_combine=True)

    # Initialize the auxiliary deep generative model.
    model = ADGMSSL(n_x=train_x.shape[-1], n_a=100, n_z=100, n_y=10, a_hidden=[500, 500],
                    z_hidden=[500, 500], xhat_hidden=[500, 500], y_hidden=[500, 500],
                    trans_func=rectify, x_dist='bernoulli')

    model_id = 20151209002003  # Insert the trained model id here.
    model.load_model(model_id)  # Load trained model. See configurations in the log file.

    # Evaluate the test error of the ADGM.
    mean_evals = model.get_output(test_x, 100)  # 100 MC to get a good estimate for the auxiliary unit.
    t_class = np.argmax(test_t, axis=1)
    y_class = np.argmax(mean_evals, axis=1)
    class_err = np.sum(y_class != t_class) / 100.
    print "test set 100-samples: %0.2f%%." % class_err

    # Evaluate the active units in the auxiliary and latent distribution.
    f_a_mu_logvar = theano.function([model.sym_x_l], get_output([model.l_a_mu, model.l_a_logvar], model.sym_x_l))
    q_a_mu, q_a_logvar = f_a_mu_logvar(test_x)
    log_pa = -0.5 * (np.log(2 * np.pi) + (q_a_mu ** 2 + np.exp(q_a_logvar)))
    log_qa_x = -0.5 * (np.log(2 * np.pi) + 1 + q_a_logvar)
    diff_pa_qa_x = (log_pa - log_qa_x).mean(axis=(1, 2))
    mean_diff_pa_qa_x = np.abs(np.mean(diff_pa_qa_x, axis=0))

    inputs = {model.l_x_in: model.sym_x_l, model.l_y_in: model.sym_t_l}
    f_z_mu_logvar = theano.function([model.sym_x_l, model.sym_t_l],
                                    get_output([model.l_z_mu, model.l_z_logvar], inputs))
    q_z_mu, q_z_logvar = f_z_mu_logvar(test_x, test_t)
    log_pz = -0.5 * (np.log(2 * np.pi) + (q_z_mu ** 2 + np.exp(q_z_logvar)))
    log_qz_x = -0.5 * (np.log(2 * np.pi) + 1 + q_z_logvar)
    diff_pz_qz_x = (log_pz - log_qz_x).mean(axis=(1, 2))
    mean_diff_pz_qz_x = np.abs(np.mean(diff_pz_qz_x, axis=0))

    plt.figure()
    plt.subplot(111, axisbg='white')
    plt.plot(sorted(mean_diff_pa_qa_x)[::-1], color="#c0392b", label=r"$\log \frac{p(a_i)}{q(a_i|x)}$")
    plt.plot(sorted(mean_diff_pz_qz_x)[::-1], color="#9b59b6", label=r"$\log \frac{p(z_i)}{q(z_i|x)}$")
    plt.grid(color='0.9', linestyle='dashed', axis="y")
    plt.xlabel("stochastic units")
    plt.ylabel(r"$\log \frac{p(\cdot)}{q(\cdot)}$")
    plt.ylim((0, 2.7))
    plt.legend()
    plt.savefig("output/diff.png", format="png")

    # Sample 100 random normal distributed samples with fixed class y in the latent space and generate xhat.
    table_size = 10
    samples = 1
    z = np.random.random_sample((table_size ** 2, 100))
    y = np.eye(10, k=0).reshape(10, 1, 10).repeat(10, axis=1).reshape((-1, 10))
    xhat = model.f_xhat(z, y, samples)

    plt.figure(figsize=(20, 20), dpi=300)
    i = 0
    img_out = np.zeros((28 * table_size, 28 * table_size))
    for x in range(table_size):
        for y in range(table_size):
            xa, xb = x * 28, (x + 1) * 28
            ya, yb = y * 28, (y + 1) * 28
            im = np.reshape(xhat[i], (28, 28))
            img_out[xa:xb, ya:yb] = im
            i += 1
    plt.matshow(img_out, cmap=plt.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    plt.savefig("output/mnist.png", format="png")
Exemplo n.º 4
0
def run_adgmssl_mnist():
    """
    Evaluate a auxiliary deep generative model on the mnist dataset with 100 evenly distributed labels.
    """

    # Load the mnist supervised dataset for evaluation.
    (train_x, train_t), (test_x, test_t), (valid_x,
                                           valid_t) = mnist.load_supervised(
                                               filter_std=0.0,
                                               train_valid_combine=True)

    # Initialize the auxiliary deep generative model.
    model = ADGMSSL(n_x=train_x.shape[-1],
                    n_a=100,
                    n_z=100,
                    n_y=10,
                    a_hidden=[500, 500],
                    z_hidden=[500, 500],
                    xhat_hidden=[500, 500],
                    y_hidden=[500, 500],
                    trans_func=rectify,
                    x_dist='bernoulli')

    model_id = 20151209002003  # Insert the trained model id here.
    model.load_model(
        model_id)  # Load trained model. See configurations in the log file.

    # Evaluate the test error of the ADGM.
    mean_evals = model.get_output(
        test_x, 100)  # 100 MC to get a good estimate for the auxiliary unit.
    t_class = np.argmax(test_t, axis=1)
    y_class = np.argmax(mean_evals, axis=1)
    class_err = np.sum(y_class != t_class) / 100.
    print "test set 100-samples: %0.2f%%." % class_err

    # Evaluate the active units in the auxiliary and latent distribution.
    f_a_mu_logvar = theano.function(
        [model.sym_x_l],
        get_output([model.l_a_mu, model.l_a_logvar], model.sym_x_l))
    q_a_mu, q_a_logvar = f_a_mu_logvar(test_x)
    log_pa = -0.5 * (np.log(2 * np.pi) + (q_a_mu**2 + np.exp(q_a_logvar)))
    log_qa_x = -0.5 * (np.log(2 * np.pi) + 1 + q_a_logvar)
    diff_pa_qa_x = (log_pa - log_qa_x).mean(axis=(1, 2))
    mean_diff_pa_qa_x = np.abs(np.mean(diff_pa_qa_x, axis=0))

    inputs = {model.l_x_in: model.sym_x_l, model.l_y_in: model.sym_t_l}
    f_z_mu_logvar = theano.function(
        [model.sym_x_l, model.sym_t_l],
        get_output([model.l_z_mu, model.l_z_logvar], inputs))
    q_z_mu, q_z_logvar = f_z_mu_logvar(test_x, test_t)
    log_pz = -0.5 * (np.log(2 * np.pi) + (q_z_mu**2 + np.exp(q_z_logvar)))
    log_qz_x = -0.5 * (np.log(2 * np.pi) + 1 + q_z_logvar)
    diff_pz_qz_x = (log_pz - log_qz_x).mean(axis=(1, 2))
    mean_diff_pz_qz_x = np.abs(np.mean(diff_pz_qz_x, axis=0))

    plt.figure()
    plt.subplot(111, axisbg='white')
    plt.plot(sorted(mean_diff_pa_qa_x)[::-1],
             color="#c0392b",
             label=r"$\log \frac{p(a_i)}{q(a_i|x)}$")
    plt.plot(sorted(mean_diff_pz_qz_x)[::-1],
             color="#9b59b6",
             label=r"$\log \frac{p(z_i)}{q(z_i|x)}$")
    plt.grid(color='0.9', linestyle='dashed', axis="y")
    plt.xlabel("stochastic units")
    plt.ylabel(r"$\log \frac{p(\cdot)}{q(\cdot)}$")
    plt.ylim((0, 2.7))
    plt.legend()
    plt.savefig("output/diff.png", format="png")

    # Sample 100 random normal distributed samples with fixed class y in the latent space and generate xhat.
    table_size = 10
    samples = 1
    z = np.random.random_sample((table_size**2, 100))
    y = np.eye(10, k=0).reshape(10, 1, 10).repeat(10, axis=1).reshape((-1, 10))
    xhat = model.f_xhat(z, y, samples)

    plt.figure(figsize=(20, 20), dpi=300)
    i = 0
    img_out = np.zeros((28 * table_size, 28 * table_size))
    for x in range(table_size):
        for y in range(table_size):
            xa, xb = x * 28, (x + 1) * 28
            ya, yb = y * 28, (y + 1) * 28
            im = np.reshape(xhat[i], (28, 28))
            img_out[xa:xb, ya:yb] = im
            i += 1
    plt.matshow(img_out, cmap=plt.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    plt.savefig("output/mnist.png", format="png")