Пример #1
0
def demo_compare_dtp_optimizers(
    hidden_sizes=[240],
    n_epochs=10,
    minibatch_size=20,
    n_tests=20,
    hidden_activation='tanh',
):

    dataset = get_mnist_dataset(flat=True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    def make_dtp_net(optimizer_constructor, output_fcn):
        return DifferenceTargetMLP.from_initializer(
            input_size=dataset.input_size,
            output_size=dataset.target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=optimizer_constructor,
            input_activation='sigm',
            hidden_activation=hidden_activation,
            output_activation=output_fcn,
            w_init_mag=0.01,
            noise=1,
        ).compile()

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors={
            'SGD-0.001-softmax':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='softmax'),
            'AdaMax-0.001-softmax':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='softmax'),
            'RMSProp-0.001-softmax':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='softmax'),
            'SGD-0.001-sigm':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='sigm'),
            'AdaMax-0.001-sigm':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='sigm'),
            'RMSProp-0.001-sigm':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='sigm'),
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )

    plot_learning_curves(learning_curves)
Пример #2
0
def test_variational_autoencoder():
    """
    Just test that after training, samples are closer to the test data than they are before training.
    """

    dataset = get_synthetic_clusters_dataset()
    rng = np.random.RandomState(1234)
    model = VariationalAutoencoder(pq_pair=EncoderDecoderNetworks(
        x_dim=dataset.input_shape[0],
        z_dim=2,
        encoder_hidden_sizes=[],
        decoder_hidden_sizes=[],
        w_init=lambda n_in, n_out: 0.01 * rng.randn(n_in, n_out),
    ),
                                   optimizer=AdaMax(alpha=0.1),
                                   rng=rng)
    train_fcn = model.train.compile()
    gen_fcn = model.sample.compile()
    initial_mcm = mean_closest_match(gen_fcn(100), dataset.test_set.input,
                                     'L1')
    for minibatch in minibatch_iterate(dataset.training_set.input,
                                       minibatch_size=10,
                                       n_epochs=1):
        train_fcn(minibatch)
    final_mcm = mean_closest_match(gen_fcn(100), dataset.test_set.input, 'L1')
    assert final_mcm < initial_mcm / 2
Пример #3
0
def mnist_adamax_showdown(hidden_size=300, n_epochs=10, n_tests=20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
        function=MultiLayerPerceptron.from_init(layer_sizes=[
            dataset.input_size, hidden_size, dataset.n_categories
        ],
                                                hidden_activation='sig',
                                                output_activation='lin',
                                                w_init=0.01,
                                                rng=5),
        cost_function=softmax_negative_log_likelihood,
        optimizer=optimizer,
    ).compile()

    return compare_predictors(dataset=dataset,
                              online_predictors={
                                  'sgd':
                                  make_mlp(SimpleGradientDescent(eta=0.1)),
                                  'adamax': make_mlp(AdaMax(alpha=1e-3)),
                              },
                              minibatch_size=20,
                              test_epochs=sqrtspace(0, n_epochs, n_tests),
                              evaluation_function=percent_argmax_correct)
Пример #4
0
def demo_rbm_mnist(
        vis_activation = 'bernoulli',
        hid_activation = 'bernoulli',
        n_hidden = 500,
        plot = True,
        eta = 0.01,
        optimizer = 'sgd',
        w_init_mag = 0.001,
        minibatch_size = 9,
        persistent = False,
        n_epochs = 100,
        plot_interval = 100,
        ):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.

    What you see:
    A plot will appear with 6 subplots.  The subplots are as follows:
    hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples.
    visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain.
    w: A subset of the weight vectors, reshaped to the shape of the input.
    b: The bias of the hidden units.
    b_rev: The bias of the visible units.
    visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the
        training function).

    As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data.
    """
    with EnableOmniscence():
        # EnableOmniscence allows us to plot internal variables (by referencing the .locals() attribute of a symbolic function.. see plot_fcn below)

        if is_test_mode():
            n_epochs = 0.01

        data = get_mnist_dataset(flat = True).training_set.input

        rbm = simple_rbm(
            visible_layer = StochasticNonlinearity(vis_activation),
            bridge=FullyConnectedBridge(w = w_init_mag*np.random.randn(28*28, n_hidden).astype(theano.config.floatX), b=0, b_rev = 0),
            hidden_layer = StochasticNonlinearity(hid_activation)
            )

        optimizer = \
            SimpleGradientDescent(eta = eta) if optimizer == 'sgd' else \
            AdaMax(alpha=eta) if optimizer == 'adamax' else \
            bad_value(optimizer)

        train_function = rbm.get_training_fcn(n_gibbs = 1, persistent = persistent, optimizer = optimizer).compile()

        def plot_fcn():
            lv = train_function.locals()
            dbplot(lv['wake_visible'].reshape((-1, 28, 28)), 'visible-pos-chain')
            dbplot(lv['sleep_visible'].reshape((-1, 28, 28)), 'visible-neg-chain')

        for i, visible_data in enumerate(minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)):
            train_function(visible_data)
            if plot and i % plot_interval == 0:
                plot_fcn()
def demo_lstm_novelist(
        book = 'bible',
        n_hidden = 400,
        verse_duration = 20,
        generation_duration = 200,
        generate_every = 200,
        max_len = None,
        hidden_layer_type = 'tanh',
        n_epochs = 1,
        seed = None,
        ):
    """
    An LSTM-Autoencoder learns the Bible, and can spontaniously produce biblical-ish verses.

    :param n_hidden: Number of hidden/memory units in LSTM
    :param verse_duration: Number of Backprop-Through-Time steps to do.
    :param generation_duration: Number of characters to generate with each sample.
    :param generate_every: Generate every N training iterations
    :param max_len: Truncate the text to this length.
    :param n_epochs: Number of passes through the bible to make.
    :param seed: Random Seed
    :return:
    """

    if is_test_mode():
        n_hidden=10
        verse_duration=7
        generation_duration=5
        max_len = 40

    rng = np.random.RandomState(seed)
    text = read_book(book, max_characters=max_len)

    onehot_text, decode_key = text_to_onehot(text)
    n_char = onehot_text.shape[1]

    the_prophet = AutoencodingLSTM(n_input=n_char, n_hidden=n_hidden,
        initializer_fcn=lambda shape: 0.01*rng.randn(*shape), hidden_layer_type = hidden_layer_type)

    training_fcn = the_prophet.get_training_function(optimizer=AdaMax(alpha = 0.01), update_states=True).compile(add_test_values = True)
    generating_fcn = the_prophet.get_generation_function(stochastic=True).compile(add_test_values = True)

    printer = TextWrappingPrinter(newline_every=100)

    def prime_and_generate(n_steps, primer = ''):
        onehot_primer, _ = text_to_onehot(primer, decode_key)
        onehot_gen, = generating_fcn(onehot_primer, n_steps)
        gen = onehot_to_text(onehot_gen, decode_key)
        return '%s%s' % (primer, gen)

    prime_and_generate(generation_duration, 'In the beginning, ')

    for i, verse in enumerate(minibatch_iterate(onehot_text, minibatch_size=verse_duration, n_epochs=n_epochs)):
        if i % generate_every == 0:
            printer.write('[iter %s]%s' % (i, prime_and_generate(n_steps = generation_duration), ))
        training_fcn(verse)

    printer.write('[iter %s]%s' % (i, prime_and_generate(n_steps = generation_duration), ))
Пример #6
0
def backprop_vs_difference_target_prop(hidden_sizes=[240],
                                       n_epochs=10,
                                       minibatch_size=20,
                                       n_tests=20):

    dataset = get_mnist_dataset(flat=True)
    dataset = dataset.process_with(
        targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors={
            'backprop-mlp':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size] + hidden_sizes +
                    [dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init=0.01,
                    rng=5),
                cost_function=mean_squared_error,
                optimizer=AdaMax(0.01),
            ).compile(),
            'difference-target-prop-mlp':
            DifferenceTargetMLP.from_initializer(
                input_size=dataset.input_size,
                output_size=dataset.target_size,
                hidden_sizes=hidden_sizes,
                optimizer_constructor=lambda: AdaMax(0.01),
                w_init=0.01,
                noise=1,
            ).compile()
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )
Пример #7
0
 def __init__(self, pq_pair, optimizer=AdaMax(alpha=0.01), rng=None):
     """
     :param pq_pair: An IVeriationalPair object
     :param optimizer: An IGradientOptimizer object
     :param rng: A random number generator, or seed.
     """
     self.rng = get_theano_rng(rng)
     self.pq_pair = pq_pair
     self.optimizer = optimizer
Пример #8
0
def test_convnet_serialization():

    cifar10 = get_cifar_10_dataset(normalize_inputs=True,
                                   n_training_samples=50,
                                   n_test_samples=50)
    test_epochs = [0, 1, 2]
    assert cifar10.input_shape == (3, 32, 32)

    net = ConvNet.from_init(
        input_shape=cifar10.input_shape,
        w_init=0.01,
        specifiers=[
            ConvInitSpec(n_maps=24, filter_size=(3, 3), mode='same'),
            NonlinearitySpec('relu'),
            PoolerSpec(region=2, stride=2, mode='max'),  # (16x16)
            ConvInitSpec(n_maps=48, filter_size=(3, 3), mode='same'),
            NonlinearitySpec('relu'),
            PoolerSpec(region=2, stride=2, mode='max'),  # (8x8)
            ConvInitSpec(n_maps=96, filter_size=(3, 3), mode='same'),
            NonlinearitySpec('relu'),
            PoolerSpec(region=2, stride=2, mode='max'),  # (4x4),
            ConvInitSpec(n_maps=192, filter_size=(4, 4),
                         mode='valid'),  # (1x1)
            NonlinearitySpec('relu'),
            ConvInitSpec(n_maps=10, filter_size=(1, 1), mode='valid'),
            NonlinearitySpec('softmax'),
        ],
    )

    predictor = GradientBasedPredictor(
        function=net,
        cost_function=negative_log_likelihood_dangerous,
        optimizer=AdaMax())

    assess_online_symbolic_predictor(
        predictor=predictor,
        dataset=cifar10,
        evaluation_function=percent_argmax_correct,
        test_epochs=test_epochs,
        minibatch_size=20,
        add_test_values=False)

    results_1 = net.compile()(cifar10.test_set.input)

    savable = net.to_spec()
    serialized = pickle.dumps(savable)
    deserialized = pickle.loads(serialized)

    net_2 = ConvNet.from_init(deserialized,
                              input_shape=cifar10.input_shape,
                              rng=None)
    results_2 = net_2.compile()(cifar10.test_set.input)
    assert np.array_equal(results_1, results_2)
Пример #9
0
def demo_gan_mnist(n_epochs=20,
                   minibatch_size=20,
                   n_discriminator_steps=1,
                   noise_dim=10,
                   plot_period=100,
                   rng=1234):
    """
    Train a Generative Adversarial network on MNIST data, showing generated samples as training progresses.

    :param n_epochs: Number of epochs to train
    :param minibatch_size: Size of minibatch to feed in each training iteration
    :param n_discriminator_steps: Number of steps training discriminator for every step of training generator
    :param noise_dim: Dimensionality of latent space (from which random samples are pulled)
    :param plot_period: Plot every N training iterations
    :param rng: Random number generator or seed
    """

    net = GenerativeAdversarialNetwork(
        discriminator=MultiLayerPerceptron.from_init(w_init=0.01,
                                                     layer_sizes=[784, 100, 1],
                                                     hidden_activation='relu',
                                                     output_activation='sig',
                                                     rng=rng),
        generator=MultiLayerPerceptron.from_init(
            w_init=0.1,
            layer_sizes=[noise_dim, 200, 784],
            hidden_activation='relu',
            output_activation='sig',
            rng=rng),
        noise_dim=noise_dim,
        optimizer=AdaMax(0.001),
        rng=rng)

    data = get_mnist_dataset(flat=True).training_set.input

    f_train_discriminator = net.train_discriminator.compile()
    f_train_generator = net.train_generator.compile()
    f_generate = net.generate.compile()

    for i, minibatch in enumerate(
            minibatch_iterate(data,
                              n_epochs=n_epochs,
                              minibatch_size=minibatch_size)):
        f_train_discriminator(minibatch)
        print 'Trained Discriminator'
        if i % n_discriminator_steps == n_discriminator_steps - 1:
            f_train_generator(n_samples=minibatch_size)
            print 'Trained Generator'
        if i % plot_period == 0:
            samples = f_generate(n_samples=minibatch_size)
            dbplot(minibatch.reshape(-1, 28, 28), "Real")
            dbplot(samples.reshape(-1, 28, 28), "Counterfeit")
            print 'Disp'
Пример #10
0
def demo_variational_autoencoder(minibatch_size=100,
                                 n_epochs=2000,
                                 plot_interval=100,
                                 seed=None):
    """
    Train a Variational Autoencoder on MNIST and look at the samples it generates.
    :param minibatch_size: Number of elements in the minibatch
    :param n_epochs: Number of passes through dataset
    :param plot_interval: Plot every x iterations
    """

    data = get_mnist_dataset(flat=True).training_set.input

    if is_test_mode():
        n_epochs = 1
        minibatch_size = 10
        data = data[:100]

    rng = get_rng(seed)

    model = VariationalAutoencoder(pq_pair=EncoderDecoderNetworks(
        x_dim=data.shape[1],
        z_dim=20,
        encoder_hidden_sizes=[200],
        decoder_hidden_sizes=[200],
        w_init=lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out),
        x_distribution='bernoulli',
        z_distribution='gaussian',
        hidden_activation='softplus'),
                                   optimizer=AdaMax(alpha=0.003),
                                   rng=rng)

    training_fcn = model.train.compile()

    sampling_fcn = model.sample.compile()

    for i, minibatch in enumerate(
            minibatch_iterate(data,
                              minibatch_size=minibatch_size,
                              n_epochs=n_epochs)):

        training_fcn(minibatch)

        if i % plot_interval == 0:
            print 'Epoch %s' % (i * minibatch_size / float(len(data)), )
            samples = sampling_fcn(25).reshape(5, 5, 28, 28)
            dbplot(samples, 'Samples from Model')
            dbplot(
                model.pq_pair.p_net.parameters[-2].get_value()[:25].reshape(
                    -1, 28, 28), 'dec')
            dbplot(
                model.pq_pair.q_net.parameters[0].get_value().T[:25].reshape(
                    -1, 28, 28), 'enc')
Пример #11
0
def test_autoencoding_lstm(width=8, seed=1234):

    data = get_bounce_data(width=width)
    encoder = OneHotEncoding(n_classes=width, dtype=theano.config.floatX)
    onehot_data = encoder(data)
    rng = np.random.RandomState(seed)
    aelstm = AutoencodingLSTM(
        n_input=8,
        n_hidden=50,
        initializer_fcn=lambda shape: 0.01 * rng.randn(*shape))

    gen_fcn = aelstm.get_generation_function(
        maintain_state=True, rng=rng).compile(add_test_values=True)
    train_fcn = aelstm.get_training_function(
        update_states=True,
        optimizer=AdaMax(alpha=0.1)).compile(add_test_values=True)

    def prime_and_gen(primer, n_steps):
        onehot_primer = encoder(np.array(primer))
        onehot_generated, = gen_fcn(onehot_primer, n_steps)
        generated = encoder.inverse(onehot_generated)
        return generated

    initial_seq = prime_and_gen([0, 1, 2, 3, 4], 11)
    print initial_seq

    # Test empty, one-length primers
    prime_and_gen([], 2)
    prime_and_gen([0], 2)

    print 'Training....'
    for d in minibatch_iterate(onehot_data, minibatch_size=3, n_epochs=400):
        train_fcn(d)
    print 'Done.'

    final_seq = prime_and_gen([0, 1, 2, 3, 4], 11)
    assert np.array_equal(
        final_seq,
        [5, 6, 7, 6, 5, 4, 3, 2, 1, 0, 1]), 'Bzzzz! It was %s' % (final_seq, )

    # Assert state is maintained
    seq = prime_and_gen([], 3)
    assert np.array_equal(seq, [2, 3, 4]), 'Bzzzz! It was %s' % (seq, )
    seq = prime_and_gen([5], 3)
    assert np.array_equal(seq, [6, 7, 6]), 'Bzzzz! It was %s' % (seq, )

    # Assert training does not interrupt generation state.
    train_fcn(d)
    seq = prime_and_gen([], 3)
    assert np.array_equal(seq, [5, 4, 3]), 'Bzzzz! It was %s' % (seq, )
Пример #12
0
    def get_training_function(self,
                              cost_func=mean_xe,
                              optimizer=AdaMax(alpha=1e-3),
                              update_states=True):
        """
        Get the symbolic function that will be used to train the AutoEncodingLSTM.
        :param cost_func: Function that takes actual outputs, target outputs and returns a cost.
        :param optimizer: Optimizer: takes cost, parameters, returns updates.
        :param update_states: If true, the hidden state is maintained between calls to the training
            function.  This makes sense if your data is coming in sequentially.
        :return:
        """
        @symbolic_updater
        def training_fcn(inputs):
            hidden_reps = self.lstm.multi_step(inputs,
                                               update_states=update_states)
            outputs = self.output_activation(
                hidden_reps.dot(self.w_hz) + self.b_z)
            cost = cost_func(actual=outputs[:-1], target=inputs[1:])
            optimizer(cost=cost, parameters=self.parameters)

        return training_fcn
Пример #13
0
        input_activation='relu',
        hidden_activation='relu',
        output_activation='relu',
        optimizer_constructor=lambda: RMSProp(learning_rate=0.001),
    ),
    description=
    "DTP with an entirely RELU network, using RMSprop as an optimizer",
    conclusion="RMSProp and RELU do not mix at all!")

register_experiment(
    name='all-relu-dtp-adamax',
    function=lambda: demo_run_dtp_on_mnist(
        input_activation='relu',
        hidden_activation='relu',
        output_activation='relu',
        optimizer_constructor=lambda: AdaMax(alpha=0.001),
    ),
    description=
    "DTP with an entirely RELU network, using RMSprop as an optimizer",
    conclusion=
    "AdaMax and RELU do not mix well either!  (not as horrible as RMSProp though)"
)

register_experiment(
    name='all-relu-LinDTP',
    function=lambda: demo_run_dtp_on_mnist(input_activation='relu',
                                           hidden_activation='relu',
                                           output_activation='relu',
                                           optimizer_constructor=lambda:
                                           GradientDescent(eta=0.01),
                                           n_epochs=30,
Пример #14
0
def demo_simple_vae_on_mnist(minibatch_size=100,
                             n_epochs=2000,
                             plot_interval=100,
                             calculation_interval=500,
                             z_dim=2,
                             hidden_sizes=[400, 200],
                             learning_rate=0.003,
                             hidden_activation='softplus',
                             binary_x=True,
                             w_init_mag=0.01,
                             gaussian_min_var=None,
                             manifold_grid_size=11,
                             manifold_grid_span=2,
                             seed=None):
    """
    Train a Variational Autoencoder on MNIST and look at the samples it generates.
    """

    dataset = get_mnist_dataset(flat=True)
    training_data = dataset.training_set.input
    test_data = dataset.test_set.input

    if is_test_mode():
        n_epochs = 1
        minibatch_size = 10
        training_data = training_data[:100]
        test_data = test_data[:100]

    model = GaussianVariationalAutoencoder(
        x_dim=training_data.shape[1],
        z_dim=z_dim,
        encoder_hidden_sizes=hidden_sizes,
        decoder_hidden_sizes=hidden_sizes[::-1],
        w_init_mag=w_init_mag,
        binary_data=binary_x,
        hidden_activation=hidden_activation,
        optimizer=AdaMax(alpha=learning_rate),
        gaussian_min_var=gaussian_min_var,
        rng=seed)

    training_fcn = model.train.compile()

    # For display, make functions to sample and represent the manifold.
    sampling_fcn = model.sample.compile()
    z_manifold_grid = np.array([
        x.flatten() for x in np.meshgrid(
            np.linspace(-manifold_grid_span, manifold_grid_span,
                        manifold_grid_size),
            np.linspace(-manifold_grid_span, manifold_grid_span,
                        manifold_grid_size))
    ] + [np.zeros(manifold_grid_size**2)] * (z_dim - 2)).T
    decoder_mean_fcn = model.decode.compile(fixed_args=dict(z=z_manifold_grid))
    lower_bound_fcn = model.compute_lower_bound.compile()

    for i, minibatch in enumerate(
            minibatch_iterate(training_data,
                              minibatch_size=minibatch_size,
                              n_epochs=n_epochs)):

        training_fcn(minibatch)

        if i % plot_interval == 0:
            samples = sampling_fcn(25).reshape(5, 5, 28, 28)
            dbplot(samples, 'Samples from Model')
            if binary_x:
                manifold_means = decoder_mean_fcn()
            else:
                manifold_means, _ = decoder_mean_fcn()
            dbplot(
                manifold_means.reshape(manifold_grid_size, manifold_grid_size,
                                       28, 28),
                'First 2-dimensions of manifold.')
        if i % calculation_interval == 0:
            training_lower_bound = lower_bound_fcn(training_data)
            test_lower_bound = lower_bound_fcn(test_data)
            print 'Epoch: %s, Training Lower Bound: %s, Test Lower bound: %s' % \
                (i*minibatch_size/float(len(training_data)), training_lower_bound, test_lower_bound)
Пример #15
0
    def __init__(self,
                 x_dim,
                 z_dim,
                 encoder_hidden_sizes=[100],
                 decoder_hidden_sizes=[100],
                 hidden_activation='tanh',
                 w_init_mag=0.01,
                 binary_data=False,
                 optimizer=AdaMax(alpha=0.01),
                 rng=None,
                 gaussian_min_var=None):
        """
        :param x_dim: Dimensionsality of the data
        :param z_dim: Dimensionalality of the latent space
        :param encoder_hidden_sizes: A list of sizes of each hidden layer in the encoder (from X to Z)
        :param decoder_hidden_sizes: A list of sizes of each hidden layer in the dencoder (from Z to X)
        :param hidden_activation: Activation function for all hidden layers
        :param w_init_mag: Magnitude of initial weights
        :param binary_data: Chose if data is binary.  You can also use this if data is bound in [0, 1] - then we can think
            of it as being the expected value.
        :param optimizer: An IGradientOptimizer object for doing parameter updates
            ... see plato.tools.optimization.optimizers
        :param rng: A random number generator or random seed.
        """
        np_rng = get_rng(rng)

        encoder_layer_sizes = [x_dim] + encoder_hidden_sizes
        self.encoder_hidden_layers = [
            Layer(w_init_mag * np_rng.randn(n_in, n_out),
                  nonlinearity=hidden_activation) for n_in, n_out in zip(
                      encoder_layer_sizes[:-1], encoder_layer_sizes[1:])
        ]
        self.encoder_mean_layer = Layer(
            w_init_mag * np_rng.randn(encoder_layer_sizes[-1], z_dim),
            nonlinearity='linear')
        self.encoder_log_var_layer = Layer(
            w_init_mag * np_rng.randn(encoder_layer_sizes[-1], z_dim),
            nonlinearity='linear')

        decoder_layer_sizes = [z_dim] + decoder_hidden_sizes
        self.decoder_hidden_layers = [
            Layer(w_init_mag * np_rng.randn(n_in, n_out),
                  nonlinearity=hidden_activation) for n_in, n_out in zip(
                      decoder_layer_sizes[:-1], decoder_layer_sizes[1:])
        ]
        if binary_data:
            self.decoder_mean_layer = Layer(
                w_init_mag * np_rng.randn(decoder_layer_sizes[-1], x_dim),
                nonlinearity='sigm')
        else:
            self.decoder_mean_layer = Layer(
                w_init_mag * np_rng.randn(decoder_layer_sizes[-1], x_dim),
                nonlinearity='linear')
            self.decoder_log_var_layer = Layer(
                w_init_mag * np_rng.randn(decoder_layer_sizes[-1], x_dim),
                nonlinearity='linear')

        self.rng = get_theano_rng(np_rng)
        self.binary_data = binary_data
        self.x_size = x_dim
        self.z_size = z_dim
        self.optimizer = optimizer
        self.gaussian_min_var = gaussian_min_var
Пример #16
0
def test_adamax_optimizer():
    _test_optimizer_on_simple_classification_problem(AdaMax(alpha=0.01))