Exemplo n.º 1
0
def demo_compare_dtp_methods(predictor_constructors,
                             n_epochs=10,
                             minibatch_size=20,
                             n_tests=20,
                             onehot=True,
                             accumulator=None):
    dataset = get_mnist_dataset(flat=True, binarize=False)
    n_categories = dataset.n_categories
    if onehot:
        dataset = dataset.to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors={
            name: p(dataset.input_size, n_categories)
            for name, p in predictor_constructors.iteritems()
            if name in predictor_constructors
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
        # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))},
        accumulators=accumulator)

    plot_learning_curves(learning_curves)
Exemplo n.º 2
0
def demo_compare_dtp_methods(
        predictor_constructors,
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        onehot = True,
        accumulator = None
        ):
    dataset = get_mnist_dataset(flat = True, binarize = False)
    n_categories = dataset.n_categories
    if onehot:
        dataset = dataset.to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = {name: p(dataset.input_size, n_categories) for name, p in predictor_constructors.iteritems() if name in predictor_constructors},
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))},
        accumulators=accumulator
        )

    plot_learning_curves(learning_curves)
Exemplo n.º 3
0
def demo_dtp_varieties(hidden_sizes=[240],
                       n_epochs=10,
                       minibatch_size=20,
                       n_tests=20,
                       hidden_activation='tanh',
                       output_activation='sigm',
                       optimizer='adamax',
                       learning_rate=0.01,
                       noise=1,
                       predictors=['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'],
                       rng=1234,
                       use_bias=True,
                       live_plot=False,
                       plot=False):
    """
    ;

    :param hidden_sizes:
    :param n_epochs:
    :param minibatch_size:
    :param n_tests:
    :return:
    """
    if isinstance(predictors, str):
        predictors = [predictors]

    dataset = get_mnist_dataset(flat=True)
    dataset = dataset.process_with(
        targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    predictors = OrderedDict(
        (name,
         get_predictor(name,
                       input_size=dataset.input_size,
                       target_size=dataset.target_size,
                       hidden_sizes=hidden_sizes,
                       hidden_activation=hidden_activation,
                       output_activation=output_activation,
                       optimizer=optimizer,
                       learning_rate=learning_rate,
                       noise=noise,
                       use_bias=use_bias,
                       rng=rng)) for name in predictors)

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors=predictors,
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )

    if plot:
        plot_learning_curves(learning_curves)
Exemplo n.º 4
0
def demo_perceptron_dtp(
    hidden_sizes=[240],
    n_epochs=20,
    n_tests=20,
    minibatch_size=100,
    lin_dtp=True,
):

    dataset = get_mnist_dataset(flat=True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    predictor = DifferenceTargetMLP(layers=[
        PerceptronLayer.from_initializer(n_in,
                                         n_out,
                                         initial_mag=2,
                                         lin_dtp=lin_dtp)
        for n_in, n_out in zip([dataset.input_size] +
                               hidden_sizes, hidden_sizes +
                               [dataset.target_size])
    ],
                                    output_cost_function=None).compile()

    result = assess_online_predictor(
        predictor=predictor,
        dataset=dataset,
        minibatch_size=minibatch_size,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_tests),
    )

    plot_learning_curves(result)
Exemplo n.º 5
0
def demo_rbm_mnist(
        vis_activation = 'bernoulli',
        hid_activation = 'bernoulli',
        n_hidden = 500,
        plot = True,
        eta = 0.01,
        optimizer = 'sgd',
        w_init_mag = 0.001,
        minibatch_size = 9,
        persistent = False,
        n_epochs = 100,
        plot_interval = 100,
        ):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.

    What you see:
    A plot will appear with 6 subplots.  The subplots are as follows:
    hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples.
    visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain.
    w: A subset of the weight vectors, reshaped to the shape of the input.
    b: The bias of the hidden units.
    b_rev: The bias of the visible units.
    visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the
        training function).

    As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data.
    """
    with EnableOmniscence():

        if is_test_mode():
            n_epochs = 0.01

        data = get_mnist_dataset(flat = True).training_set.input

        rbm = simple_rbm(
            visible_layer = StochasticNonlinearity(vis_activation),
            bridge=FullyConnectedBridge(w = w_init_mag*np.random.randn(28*28, n_hidden).astype(theano.config.floatX), b=0, b_rev = 0),
            hidden_layer = StochasticNonlinearity(hid_activation)
            )

        optimizer = \
            SimpleGradientDescent(eta = eta) if optimizer == 'sgd' else \
            AdaMax(alpha=eta) if optimizer == 'adamax' else \
            bad_value(optimizer)

        train_function = rbm.get_training_fcn(n_gibbs = 1, persistent = persistent, optimizer = optimizer).compile()

        def plot_fcn():
            lv = train_function.locals()
            dbplot({
                'visible-pos-chain': lv['wake_visible'].reshape((-1, 28, 28)),
                'visible-neg-chain': lv['sleep_visible'].reshape((-1, 28, 28)),
                })

        for i, visible_data in enumerate(minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)):
            train_function(visible_data)
            if plot and i % plot_interval == 0:
                plot_fcn()
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron(
                layer_sizes=[hidden_size, dataset.n_categories],
                input_size = dataset.input_size,
                hidden_activation='sig',
                output_activation='lin',
                w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Exemplo n.º 7
0
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories],
                hidden_activation='sig',
                output_activation='lin',
                w_init = 0.01,
                rng = 5
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Exemplo n.º 8
0
def demo_perceptron_dtp(
        hidden_sizes = [240],
        n_epochs = 20,
        n_tests = 20,
        minibatch_size=100,
        lin_dtp = True,
        ):

    dataset = get_mnist_dataset(flat = True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    predictor = DifferenceTargetMLP(
        layers=[PerceptronLayer.from_initializer(n_in, n_out, initial_mag=2, lin_dtp = lin_dtp)
                for n_in, n_out in zip([dataset.input_size]+hidden_sizes, hidden_sizes+[dataset.target_size])],
        output_cost_function = None
        ).compile()

    result = assess_online_predictor(
        predictor = predictor,
        dataset = dataset,
        minibatch_size=minibatch_size,
        evaluation_function='percent_argmax_correct',
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        )

    plot_learning_curves(result)
Exemplo n.º 9
0
def demo_rbm_mnist(
        vis_activation = 'bernoulli',
        hid_activation = 'bernoulli',
        n_hidden = 500,
        plot = True,
        eta = 0.01,
        optimizer = 'sgd',
        w_init_mag = 0.001,
        minibatch_size = 9,
        persistent = False,
        n_epochs = 100,
        plot_interval = 100,
        ):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.

    What you see:
    A plot will appear with 6 subplots.  The subplots are as follows:
    hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples.
    visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain.
    w: A subset of the weight vectors, reshaped to the shape of the input.
    b: The bias of the hidden units.
    b_rev: The bias of the visible units.
    visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the
        training function).

    As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data.
    """
    with EnableOmniscence():
        # EnableOmniscence allows us to plot internal variables (by referencing the .locals() attribute of a symbolic function.. see plot_fcn below)

        if is_test_mode():
            n_epochs = 0.01

        data = get_mnist_dataset(flat = True).training_set.input

        rbm = simple_rbm(
            visible_layer = StochasticNonlinearity(vis_activation),
            bridge=FullyConnectedBridge(w = w_init_mag*np.random.randn(28*28, n_hidden).astype(theano.config.floatX), b=0, b_rev = 0),
            hidden_layer = StochasticNonlinearity(hid_activation)
            )

        optimizer = \
            SimpleGradientDescent(eta = eta) if optimizer == 'sgd' else \
            AdaMax(alpha=eta) if optimizer == 'adamax' else \
            bad_value(optimizer)

        train_function = rbm.get_training_fcn(n_gibbs = 1, persistent = persistent, optimizer = optimizer).compile()

        def plot_fcn():
            lv = train_function.locals()
            dbplot(lv['wake_visible'].reshape((-1, 28, 28)), 'visible-pos-chain')
            dbplot(lv['sleep_visible'].reshape((-1, 28, 28)), 'visible-neg-chain')

        for i, visible_data in enumerate(minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)):
            train_function(visible_data)
            if plot and i % plot_interval == 0:
                plot_fcn()
Exemplo n.º 10
0
def demo_mnist_online_regression(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        regressor_type = 'multinomial',
        n_epochs = 20,
        n_test_points = 30,
        max_training_samples = None,
        include_biases = True,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True)

    assert regressor_type in ('multinomial', 'logistic', 'linear')

    n_outputs = dataset.n_categories
    if regressor_type in ('logistic', 'linear'):
        dataset = dataset.to_onehot()

    predictor = OnlineRegressor(
        input_size = dataset.input_size,
        output_size = n_outputs,
        regressor_type = regressor_type,
        optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate),
        include_biases = include_biases
        ).compile()

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size
    )

    plot_learning_curves(results)
Exemplo n.º 11
0
def demo_mnist_online_regression(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        regressor_type = 'multinomial',
        n_epochs = 20,
        n_test_points = 30,
        max_training_samples = None,
        include_biases = True,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True)

    assert regressor_type in ('multinomial', 'logistic', 'linear')

    n_outputs = dataset.n_categories
    if regressor_type in ('logistic', 'linear'):
        dataset = dataset.to_onehot()

    predictor = OnlineRegressor(
        input_size = dataset.input_size,
        output_size = n_outputs,
        regressor_type = regressor_type,
        optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate),
        include_biases = include_biases
        ).compile()

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size
    )

    plot_learning_curves(results)
Exemplo n.º 12
0
def compare_example_predictors(
    n_epochs=5,
    n_tests=20,
    minibatch_size=10,
):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat=True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset=dataset,
        online_predictors={
            'Perceptron':
            Perceptron(w=np.zeros((dataset.input_size, dataset.n_categories)),
                       alpha=0.001).
            to_categorical(
                n_categories=dataset.n_categories
            ),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[
                        dataset.input_size, 500, dataset.n_categories
                    ],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation=
                    'softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init=0.01,
                    rng=5),
                cost_function=
                negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer=SimpleGradientDescent(eta=0.1),
            ).compile(),  # .compile() returns an IPredictor
        },
        offline_predictors={'RF': RandomForestClassifier(n_estimators=40)},
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct  # Compares one-hot
    )
    # Results is a LearningCurveData object
    return learning_curve_data
Exemplo n.º 13
0
def demo_mnist_mlp(test_mode = False):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if test_mode:
        test_period = 200
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        test_period = 1000
        minibatch_size = 20
        n_epochs = 10
        dataset = get_mnist_dataset()

    # Setup the training and test functions
    classifier = MultiLayerPerceptron(
        layer_sizes=[500, 10],
        input_size = 784,
        hidden_activation='sig',
        output_activation='softmax',
        w_init = normal_w_init(mag = 0.01)
        )
    training_cost_function = normalized_negative_log_likelihood
    optimizer = SimpleGradientDescent(eta = 0.1)
    training_function = SupervisedTrainingFunction(classifier, training_cost_function, optimizer).compile()
    test_cost_function = percent_correct
    test_function = SupervisedTestFunction(classifier, test_cost_function).compile()

    def report_test(i):
        training_cost = test_function(dataset.training_set.input, dataset.training_set.target)
        print 'Training score at iteration %s: %s' % (i, training_cost)
        test_cost = test_function(dataset.test_set.input, dataset.test_set.target)
        print 'Test score at iteration %s: %s' % (i, test_cost)

    # Train and periodically report the test score.
    print 'Running MLP on MNIST Dataset...'
    for i, (_, image_minibatch, label_minibatch) in enumerate(dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_epochs, single_channel = True)):
        if i % test_period == 0:
            report_test(i)
        training_function(image_minibatch, label_minibatch)
    report_test('Final')
    print '...Done.'
Exemplo n.º 14
0
def demo_rbm_tutorial(
        eta = 0.01,
        n_hidden = 500,
        n_samples = None,
        minibatch_size = 10,
        plot_interval = 10,
        w_init_mag = 0.01,
        n_epochs = 1,
        persistent = False,
        seed = None
        ):
    """
    This tutorial trains a standard binary-binary RBM on MNIST, and allows you to view the weights and negative sampling
    chain.

    Note:
    For simplicity, it uses hidden/visible samples to compute the gradient.  It's actually better to use the hidden
    probabilities.
    """
    if is_test_mode():
        n_samples=50
        n_epochs=1
        plot_interval=50
        n_hidden = 10

    data = get_mnist_dataset(flat = True).training_set.input[:n_samples]
    n_visible = data.shape[1]
    rng = np.random.RandomState(seed)
    activation = lambda x: (1./(1+np.exp(-x)) > rng.rand(*x.shape)).astype(float)

    w = w_init_mag*np.random.randn(n_visible, n_hidden)
    b_hid = np.zeros(n_hidden)
    b_vis = np.zeros(n_visible)

    if persistent:
        hid_sleep_state = np.random.rand(minibatch_size, n_hidden)

    for i, vis_wake_state in enumerate(minibatch_iterate(data, n_epochs = n_epochs, minibatch_size=minibatch_size)):
        hid_wake_state = activation(vis_wake_state.dot(w)+b_hid)
        if not persistent:
            hid_sleep_state = hid_wake_state
        vis_sleep_state = activation(hid_sleep_state.dot(w.T)+b_vis)
        hid_sleep_state = activation(vis_sleep_state.dot(w)+b_hid)

        # Update Parameters
        w_grad = (vis_wake_state.T.dot(hid_wake_state) - vis_sleep_state.T.dot(hid_sleep_state))/float(minibatch_size)
        w += w_grad * eta
        b_vis_grad = np.mean(vis_wake_state, axis = 0) - np.mean(vis_sleep_state, axis = 0)
        b_vis += b_vis_grad * eta
        b_hid_grad = np.mean(hid_wake_state, axis = 0) - np.mean(hid_sleep_state, axis = 0)
        b_hid += b_hid_grad * eta

        if i % plot_interval == 0:
            dbplot(w.T[:100].reshape(-1, 28, 28), 'weights')
            dbplot(vis_sleep_state.reshape(-1, 28, 28), 'dreams')
            print 'Sample %s' % i
Exemplo n.º 15
0
def demo_rbm_tutorial(
        eta = 0.01,
        n_hidden = 500,
        n_samples = None,
        minibatch_size = 10,
        plot_interval = 10,
        w_init_mag = 0.01,
        n_epochs = 1,
        persistent = False,
        seed = None
        ):
    """
    This tutorial trains a standard binary-binary RBM on MNIST, and allows you to view the weights and negative sampling
    chain.

    Note:
    For simplicity, it uses hidden/visible samples to compute the gradient.  It's actually better to use the hidden
    probabilities.
    """
    if is_test_mode():
        n_samples=50
        n_epochs=1
        plot_interval=50
        n_hidden = 10

    data = get_mnist_dataset(flat = True).training_set.input[:n_samples]
    n_visible = data.shape[1]
    rng = np.random.RandomState(seed)
    activation = lambda x: (1./(1+np.exp(-x)) > rng.rand(*x.shape)).astype(float)

    w = w_init_mag*np.random.randn(n_visible, n_hidden)
    b_hid = np.zeros(n_hidden)
    b_vis = np.zeros(n_visible)

    if persistent:
        hid_sleep_state = np.random.rand(minibatch_size, n_hidden)

    for i, vis_wake_state in enumerate(minibatch_iterate(data, n_epochs = n_epochs, minibatch_size=minibatch_size)):
        hid_wake_state = activation(vis_wake_state.dot(w)+b_hid)
        if not persistent:
            hid_sleep_state = hid_wake_state
        vis_sleep_state = activation(hid_sleep_state.dot(w.T)+b_vis)
        hid_sleep_state = activation(vis_sleep_state.dot(w)+b_hid)

        # Update Parameters
        w_grad = (vis_wake_state.T.dot(hid_wake_state) - vis_sleep_state.T.dot(hid_sleep_state))/float(minibatch_size)
        w += w_grad * eta
        b_vis_grad = np.mean(vis_wake_state, axis = 0) - np.mean(vis_sleep_state, axis = 0)
        b_vis += b_vis_grad * eta
        b_hid_grad = np.mean(hid_wake_state, axis = 0) - np.mean(hid_sleep_state, axis = 0)
        b_hid += b_hid_grad * eta

        if i % plot_interval == 0:
            dbplot(w.T[:100].reshape(-1, 28, 28), 'weights')
            dbplot(vis_sleep_state.reshape(-1, 28, 28), 'dreams')
            print 'Sample %s' % i
def profile_java_net():
    """

    Note: These times are super unreliable for some reason.. A given run can vary
    by 7s-14s for example.  God knows why.

    Version 'old', Best:
    Scores at Epoch 0.0: Test: 8.200
    Scores at Epoch 1.0: Test: 57.100
    Scores at Epoch 2.0: Test: 71.200
    Elapsed time is: 7.866s

    Version 'arr', Best:
    Scores at Epoch 0.0: Test: 8.200
    Scores at Epoch 1.0: Test: 58.200
    Scores at Epoch 2.0: Test: 71.500
    Elapsed time is: 261.1s

    Version 'new', Best:
    Scores at Epoch 0.0: Test: 8.200
    Scores at Epoch 1.0: Test: 58.200
    Scores at Epoch 2.0: Test: 71.500
    Elapsed time is: 8.825s

    :return:
    """

    mnist = get_mnist_dataset(flat=True).shorten(1000).to_onehot()

    with JPypeConnection():

        spiking_net = JavaSpikingNetWrapper.from_init(
            fractional = True,
            depth_first=False,
            smooth_grads = False,
            back_discretize = 'noreset-herding',
            w_init=0.01,
            hold_error=True,
            rng = 1234,
            n_steps = 10,
            eta=0.01,
            layer_sizes=[784]+[200]+[10],
            dtype = 'float'
            )

        with EZProfiler(print_result=True):
            result = assess_online_predictor(
                predictor = spiking_net,
                dataset=mnist,
                evaluation_function='percent_argmax_correct',
                test_epochs=[0, 1, 2],
                minibatch_size=1,
                test_on='test',
                )
Exemplo n.º 17
0
def compare_example_predictors(
        n_epochs = 5,
        n_tests = 20,
        minibatch_size = 10,
    ):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat = True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset = dataset,
        online_predictors = {
            'Perceptron': Perceptron(
                w = np.zeros((dataset.input_size, dataset.n_categories)),
                alpha = 0.001
                ).to_categorical(n_categories = dataset.n_categories),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP': GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size, 500, dataset.n_categories],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation='softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init = 0.01,
                    rng = 5
                ),
                cost_function = negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer = SimpleGradientDescent(eta = 0.1),
                ).compile(),  # .compile() returns an IPredictor
            },
        offline_predictors={
            'RF': RandomForestClassifier(n_estimators = 40)
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct  # Compares one-hot
        )
    # Results is a LearningCurveData object
    return learning_curve_data
Exemplo n.º 18
0
def profile_java_net():
    """

    Note: These times are super unreliable for some reason.. A given run can vary
    by 7s-14s for example.  God knows why.

    Version 'old', Best:
    Scores at Epoch 0.0: Test: 8.200
    Scores at Epoch 1.0: Test: 57.100
    Scores at Epoch 2.0: Test: 71.200
    Elapsed time is: 7.866s

    Version 'arr', Best:
    Scores at Epoch 0.0: Test: 8.200
    Scores at Epoch 1.0: Test: 58.200
    Scores at Epoch 2.0: Test: 71.500
    Elapsed time is: 261.1s

    Version 'new', Best:
    Scores at Epoch 0.0: Test: 8.200
    Scores at Epoch 1.0: Test: 58.200
    Scores at Epoch 2.0: Test: 71.500
    Elapsed time is: 8.825s

    :return:
    """

    mnist = get_mnist_dataset(flat=True).shorten(1000).to_onehot()

    with JPypeConnection():

        spiking_net = JavaSpikingNetWrapper.from_init(
            fractional=True,
            depth_first=False,
            smooth_grads=False,
            back_discretize='noreset-herding',
            w_init=0.01,
            hold_error=True,
            rng=1234,
            n_steps=10,
            eta=0.01,
            layer_sizes=[784] + [200] + [10],
            dtype='float')

        with EZProfiler(print_result=True):
            result = assess_online_predictor(
                predictor=spiking_net,
                dataset=mnist,
                evaluation_function='percent_argmax_correct',
                test_epochs=[0, 1, 2],
                minibatch_size=1,
                test_on='test',
            )
Exemplo n.º 19
0
def demo_variational_autoencoder(minibatch_size=100,
                                 n_epochs=2000,
                                 plot_interval=100,
                                 seed=None):
    """
    Train a Variational Autoencoder on MNIST and look at the samples it generates.
    :param minibatch_size: Number of elements in the minibatch
    :param n_epochs: Number of passes through dataset
    :param plot_interval: Plot every x iterations
    """

    data = get_mnist_dataset(flat=True).training_set.input

    if is_test_mode():
        n_epochs = 1
        minibatch_size = 10
        data = data[:100]

    rng = get_rng(seed)

    model = VariationalAutoencoder(pq_pair=EncoderDecoderNetworks(
        x_dim=data.shape[1],
        z_dim=20,
        encoder_hidden_sizes=[200],
        decoder_hidden_sizes=[200],
        w_init=lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out),
        x_distribution='bernoulli',
        z_distribution='gaussian',
        hidden_activation='softplus'),
                                   optimizer=AdaMax(alpha=0.003),
                                   rng=rng)

    training_fcn = model.train.compile()

    sampling_fcn = model.sample.compile()

    for i, minibatch in enumerate(
            minibatch_iterate(data,
                              minibatch_size=minibatch_size,
                              n_epochs=n_epochs)):

        training_fcn(minibatch)

        if i % plot_interval == 0:
            print 'Epoch %s' % (i * minibatch_size / float(len(data)), )
            samples = sampling_fcn(25).reshape(5, 5, 28, 28)
            dbplot(samples, 'Samples from Model')
            dbplot(
                model.pq_pair.p_net.parameters[-2].get_value()[:25].reshape(
                    -1, 28, 28), 'dec')
            dbplot(
                model.pq_pair.q_net.parameters[0].get_value().T[:25].reshape(
                    -1, 28, 28), 'enc')
Exemplo n.º 20
0
def demo_temporal_mnist(n_samples=None, smoothing_steps=200):
    _, _, original_data, original_labels = get_mnist_dataset(
        n_training_samples=n_samples, n_test_samples=n_samples).xyxy
    _, _, temporal_data, temporal_labels = get_temporal_mnist_dataset(
        n_training_samples=n_samples,
        n_test_samples=n_samples,
        smoothing_steps=smoothing_steps).xyxy
    for ox, oy, tx, ty in zip(original_data, original_labels, temporal_data,
                              temporal_labels):
        with hold_dbplots():
            dbplot(ox, 'sample', title=str(oy))
            dbplot(tx, 'smooth', title=str(ty))
Exemplo n.º 21
0
def demo_gan_mnist(n_epochs=20,
                   minibatch_size=20,
                   n_discriminator_steps=1,
                   noise_dim=10,
                   plot_period=100,
                   rng=1234):
    """
    Train a Generative Adversarial network on MNIST data, showing generated samples as training progresses.

    :param n_epochs: Number of epochs to train
    :param minibatch_size: Size of minibatch to feed in each training iteration
    :param n_discriminator_steps: Number of steps training discriminator for every step of training generator
    :param noise_dim: Dimensionality of latent space (from which random samples are pulled)
    :param plot_period: Plot every N training iterations
    :param rng: Random number generator or seed
    """

    net = GenerativeAdversarialNetwork(
        discriminator=MultiLayerPerceptron.from_init(w_init=0.01,
                                                     layer_sizes=[784, 100, 1],
                                                     hidden_activation='relu',
                                                     output_activation='sig',
                                                     rng=rng),
        generator=MultiLayerPerceptron.from_init(
            w_init=0.1,
            layer_sizes=[noise_dim, 200, 784],
            hidden_activation='relu',
            output_activation='sig',
            rng=rng),
        noise_dim=noise_dim,
        optimizer=AdaMax(0.001),
        rng=rng)

    data = get_mnist_dataset(flat=True).training_set.input

    f_train_discriminator = net.train_discriminator.compile()
    f_train_generator = net.train_generator.compile()
    f_generate = net.generate.compile()

    for i, minibatch in enumerate(
            minibatch_iterate(data,
                              n_epochs=n_epochs,
                              minibatch_size=minibatch_size)):
        f_train_discriminator(minibatch)
        print 'Trained Discriminator'
        if i % n_discriminator_steps == n_discriminator_steps - 1:
            f_train_generator(n_samples=minibatch_size)
            print 'Trained Generator'
        if i % plot_period == 0:
            samples = f_generate(n_samples=minibatch_size)
            dbplot(minibatch.reshape(-1, 28, 28), "Real")
            dbplot(samples.reshape(-1, 28, 28), "Counterfeit")
            print 'Disp'
Exemplo n.º 22
0
def demo_compare_dtp_optimizers(
    hidden_sizes=[240],
    n_epochs=10,
    minibatch_size=20,
    n_tests=20,
    hidden_activation='tanh',
):

    dataset = get_mnist_dataset(flat=True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    def make_dtp_net(optimizer_constructor, output_fcn):
        return DifferenceTargetMLP.from_initializer(
            input_size=dataset.input_size,
            output_size=dataset.target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=optimizer_constructor,
            input_activation='sigm',
            hidden_activation=hidden_activation,
            output_activation=output_fcn,
            w_init_mag=0.01,
            noise=1,
        ).compile()

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors={
            'SGD-0.001-softmax':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='softmax'),
            'AdaMax-0.001-softmax':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='softmax'),
            'RMSProp-0.001-softmax':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='softmax'),
            'SGD-0.001-sigm':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='sigm'),
            'AdaMax-0.001-sigm':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='sigm'),
            'RMSProp-0.001-sigm':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='sigm'),
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )

    plot_learning_curves(learning_curves)
Exemplo n.º 23
0
def demo_variational_autoencoder(
        minibatch_size = 100,
        n_epochs = 2000,
        plot_interval = 100,
        seed = None
        ):
    """
    Train a Variational Autoencoder on MNIST and look at the samples it generates.
    :param minibatch_size: Number of elements in the minibatch
    :param n_epochs: Number of passes through dataset
    :param plot_interval: Plot every x iterations
    """

    data = get_mnist_dataset(flat = True).training_set.input

    if is_test_mode():
        n_epochs=1
        minibatch_size = 10
        data = data[:100]

    rng = get_rng(seed)

    model = VariationalAutoencoder(
        pq_pair = EncoderDecoderNetworks(
            x_dim=data.shape[1],
            z_dim = 20,
            encoder_hidden_sizes = [200],
            decoder_hidden_sizes = [200],
            w_init = lambda n_in, n_out: 0.01*np.random.randn(n_in, n_out),
            x_distribution='bernoulli',
            z_distribution='gaussian',
            hidden_activation = 'softplus'
            ),
        optimizer=AdaMax(alpha = 0.003),
        rng = rng
        )

    training_fcn = model.train.compile()

    sampling_fcn = model.sample.compile()

    for i, minibatch in enumerate(minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)):

        training_fcn(minibatch)

        if i % plot_interval == 0:
            print 'Epoch %s' % (i*minibatch_size/float(len(data)), )
            samples = sampling_fcn(25).reshape(5, 5, 28, 28)
            dbplot(samples, 'Samples from Model')
            dbplot(model.pq_pair.p_net.parameters[-2].get_value()[:25].reshape(-1, 28, 28), 'dec')
            dbplot(model.pq_pair.q_net.parameters[0].get_value().T[:25].reshape(-1, 28, 28), 'enc')
Exemplo n.º 24
0
def demo_run_dtp_on_mnist(hidden_sizes=[240],
                          n_epochs=20,
                          n_tests=20,
                          minibatch_size=100,
                          input_activation='sigm',
                          hidden_activation='tanh',
                          output_activation='softmax',
                          optimizer_constructor=lambda: RMSProp(0.001),
                          normalize_inputs=False,
                          local_cost_function=mean_squared_error,
                          output_cost_function=None,
                          noise=1,
                          lin_dtp=False,
                          seed=1234):

    dataset = get_mnist_dataset(flat=True).to_onehot()
    if normalize_inputs:
        dataset = dataset.process_with(targets_processor=multichannel(
            lambda x: x / np.sum(x, axis=1, keepdims=True)))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    predictor = DifferenceTargetMLP.from_initializer(
        input_size=dataset.input_size,
        output_size=dataset.target_size,
        hidden_sizes=hidden_sizes,
        optimizer_constructor=
        optimizer_constructor,  # Note that RMSProp/AdaMax way outperform SGD here.
        # input_activation=input_activation,
        hidden_activation=hidden_activation,
        output_activation=output_activation,
        w_init_mag=0.01,
        output_cost_function=output_cost_function,
        noise=noise,
        cost_function=local_cost_function,
        layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp
        else PreActivationDifferenceTargetLayer.from_initializer,
        rng=seed).compile()

    result = assess_online_predictor(
        predictor=predictor,
        dataset=dataset,
        minibatch_size=minibatch_size,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w.
                                       get_value().T.reshape(-1, 28, 28)))

    plot_learning_curves(result)
Exemplo n.º 25
0
def demo_run_dtp_on_mnist(
        hidden_sizes = [240],
        n_epochs = 20,
        n_tests = 20,
        minibatch_size=100,
        input_activation = 'sigm',
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        optimizer_constructor = lambda: RMSProp(0.001),
        normalize_inputs = False,
        local_cost_function = mean_squared_error,
        output_cost_function = None,
        noise = 1,
        lin_dtp = False,
        seed = 1234
        ):

    dataset = get_mnist_dataset(flat = True).to_onehot()
    if normalize_inputs:
        dataset = dataset.process_with(targets_processor=multichannel(lambda x: x/np.sum(x, axis = 1, keepdims=True)))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    predictor = DifferenceTargetMLP.from_initializer(
            input_size = dataset.input_size,
            output_size = dataset.target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = optimizer_constructor,  # Note that RMSProp/AdaMax way outperform SGD here.
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=0.01,
            output_cost_function=output_cost_function,
            noise = noise,
            cost_function = local_cost_function,
            layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp else PreActivationDifferenceTargetLayer.from_initializer,
            rng = seed
            ).compile()

    result = assess_online_predictor(
        predictor = predictor,
        dataset = dataset,
        minibatch_size=minibatch_size,
        evaluation_function='percent_argmax_correct',
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))
        )

    plot_learning_curves(result)
def demo_dtp_varieties(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        hidden_activation = 'tanh',
        output_activation = 'sigm',
        optimizer = 'adamax',
        learning_rate = 0.01,
        noise = 1,
        predictors = ['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'],
        rng = 1234,
        live_plot = False,
        plot = False
        ):
    """
    ;

    :param hidden_sizes:
    :param n_epochs:
    :param minibatch_size:
    :param n_tests:
    :return:
    """
    if isinstance(predictors, str):
        predictors = [predictors]

    dataset = get_mnist_dataset(flat = True)
    dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    predictors = OrderedDict((name, get_predictor(name, input_size = dataset.input_size, target_size=dataset.target_size,
            hidden_sizes=hidden_sizes, hidden_activation=hidden_activation, output_activation = output_activation,
            optimizer=optimizer, learning_rate=learning_rate, noise = noise, rng = rng)) for name in predictors)

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = predictors,
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )

    if plot:
        plot_learning_curves(learning_curves)
Exemplo n.º 27
0
def mlp_normalization(hidden_size=300,
                      n_epochs=30,
                      n_tests=50,
                      minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
        function=MultiLayerPerceptron.from_init(layer_sizes=[
            dataset.input_size, hidden_size, dataset.n_categories
        ],
                                                hidden_activation='sig',
                                                output_activation='lin',
                                                normalize_minibatch=normalize,
                                                scale_param=scale,
                                                w_init=0.01,
                                                rng=5),
        cost_function=softmax_negative_log_likelihood,
        optimizer=SimpleGradientDescent(eta=0.1),
    ).compile()

    return compare_predictors(dataset=dataset,
                              online_predictors={
                                  'regular':
                                  make_mlp(normalize=False, scale=False),
                                  'normalize':
                                  make_mlp(normalize=True, scale=False),
                                  'normalize and scale':
                                  make_mlp(normalize=True, scale=True),
                              },
                              minibatch_size=minibatch_size,
                              test_epochs=sqrtspace(0, n_epochs, n_tests),
                              evaluation_function=percent_argmax_correct)
Exemplo n.º 28
0
def demo_rbm_mnist(plot = True, test_mode = False):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.

    What you see:
    A plot will appear with 6 subplots.  The subplots are as follows:
    hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples.
    visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain.
    w: A subset of the weight vectors, reshaped to the shape of the input.
    b: The bias of the hidden units.
    b_rev: The bias of the visible units.
    visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the
        training function).

    As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data.
    """
    set_enable_omniscence(True)
    minibatch_size = 9
    n_epochs = 0.01 if test_mode else 10

    dataset = get_mnist_dataset().process_with(inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), ))

    rbm = simple_rbm(
        visible_layer = StochasticLayer('bernoulli'),
        bridge=FullyConnectedBridge(w = 0.001*np.random.randn(28*28, 500).astype(theano.config.floatX), b=0, b_rev = 0),
        hidden_layer = StochasticLayer('bernoulli')
        )

    train_function = rbm.get_training_fcn(n_gibbs = 4, persistent = True, optimizer = SimpleGradientDescent(eta = 0.01)).compile()
    sampling_function = rbm.get_free_sampling_fcn(init_visible_state = np.random.randn(9, 28*28), return_smooth_visible = True).compile()

    if plot:
        def debug_variable_setter():
            lv = train_function.symbolic.locals()
            return {
                'hidden-neg-chain': lv.sleep_hidden.reshape((-1, 25, 20)),
                'visible-neg-chain': lv.hidden_layer.smooth(lv.bridge.reverse(lv.sleep_hidden)).reshape((-1, 28, 28)),
                'w': lv.bridge.parameters[0].T[:25].reshape((-1, 28, 28)),
                'b': lv.bridge.parameters[1].reshape((25, 20)),
                'b_rev': lv.bridge.parameters[2].reshape((28, 28)),
                }
        train_function.set_debug_variables(debug_variable_setter)

    stream = LiveStream(lambda: dict(train_function.get_debug_values().items()+[('visible-sample', visible_samples.reshape((-1, 28, 28)))]), update_every=10)
    for _, visible_data, _ in dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_epochs, single_channel = True):
        visible_samples, _ = sampling_function()
        train_function(visible_data)
        if plot:
            stream.update()
Exemplo n.º 29
0
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories],
                hidden_activation='sig',
                output_activation='lin',
                normalize_minibatch=normalize,
                scale_param=scale,
                w_init = 0.01,
                rng = 5
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = SimpleGradientDescent(eta = 0.1),
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'regular': make_mlp(normalize = False, scale = False),
            'normalize': make_mlp(normalize=True, scale = False),
            'normalize and scale': make_mlp(normalize=True, scale = True),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Exemplo n.º 30
0
def backprop_vs_difference_target_prop(hidden_sizes=[240],
                                       n_epochs=10,
                                       minibatch_size=20,
                                       n_tests=20):

    dataset = get_mnist_dataset(flat=True)
    dataset = dataset.process_with(
        targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors={
            'backprop-mlp':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size] + hidden_sizes +
                    [dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init=0.01,
                    rng=5),
                cost_function=mean_squared_error,
                optimizer=AdaMax(0.01),
            ).compile(),
            'difference-target-prop-mlp':
            DifferenceTargetMLP.from_initializer(
                input_size=dataset.input_size,
                output_size=dataset.target_size,
                hidden_sizes=hidden_sizes,
                optimizer_constructor=lambda: AdaMax(0.01),
                w_init=0.01,
                noise=1,
            ).compile()
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )
Exemplo n.º 31
0
def backprop_vs_difference_target_prop(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20
        ):

    dataset = get_mnist_dataset(flat = True)
    dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'backprop-mlp': GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size]+hidden_sizes+[dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init = 0.01,
                    rng = 5
                    ),
                cost_function = mean_squared_error,
                optimizer = AdaMax(0.01),
                ).compile(),
            'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer(
                input_size = dataset.input_size,
                output_size = dataset.target_size,
                hidden_sizes = hidden_sizes,
                optimizer_constructor = lambda: AdaMax(0.01),
                w_init=0.01,
                noise = 1,
            ).compile()
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )
Exemplo n.º 32
0
def demo_compare_dtp_optimizers(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        hidden_activation = 'tanh',
        ):

    dataset = get_mnist_dataset(flat = True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    def make_dtp_net(optimizer_constructor, output_fcn):
        return DifferenceTargetMLP.from_initializer(
            input_size = dataset.input_size,
            output_size = dataset.target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = optimizer_constructor,
            input_activation='sigm',
            hidden_activation=hidden_activation,
            output_activation=output_fcn,
            w_init_mag=0.01,
            noise = 1,
            ).compile()

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = {
            'SGD-0.001-softmax': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'softmax'),
            'AdaMax-0.001-softmax': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'softmax'),
            'RMSProp-0.001-softmax': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'softmax'),
            'SGD-0.001-sigm': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'sigm'),
            'AdaMax-0.001-sigm': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'sigm'),
            'RMSProp-0.001-sigm': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'sigm'),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )

    plot_learning_curves(learning_curves)
Exemplo n.º 33
0
def demo_mnist_mlp(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        hidden_sizes = [300],
        w_init = 0.01,
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        cost = 'nll-d',
        visualize_params = False,
        n_test_points = 30,
        n_epochs = 10,
        max_training_samples = None,
        use_bias = True,
        onehot = False,
        rng = 1234,
        plot = False,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples)

    if onehot:
        dataset = dataset.to_onehot()

    if minibatch_size == 'full':
        minibatch_size = dataset.training_set.n_samples

    optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate)

    # Setup the training and test functions
    predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes=[dataset.input_size]+hidden_sizes+[10],
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init = w_init,
            use_bias=use_bias,
            rng = rng,
            ),
        cost_function=cost,
        optimizer=optimizer
        ).compile()  # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays.

    def vis_callback(xx):
        p = predictor.symbolic_predictor._function
        in_layer = {
            'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28),
            'Layer[0].b': p.layers[0].linear_transform._b.get_value(),
            }
        other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])]
        dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], [])))

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size,
        test_callback=vis_callback if visualize_params else None
    )

    if plot:
        plot_learning_curves(results)
Exemplo n.º 34
0
def demo_simple_vae_on_mnist(
        minibatch_size = 100,
        n_epochs = 2000,
        plot_interval = 100,
        calculation_interval = 500,
        z_dim = 2,
        hidden_sizes = [400, 200],
        learning_rate = 0.003,
        hidden_activation = 'softplus',
        binary_x = True,
        w_init_mag = 0.01,
        gaussian_min_var = None,
        manifold_grid_size = 11,
        manifold_grid_span = 2,
        seed = None
        ):
    """
    Train a Variational Autoencoder on MNIST and look at the samples it generates.
    """

    dataset = get_mnist_dataset(flat = True)
    training_data = dataset.training_set.input
    test_data = dataset.test_set.input

    if is_test_mode():
        n_epochs=1
        minibatch_size = 10
        training_data = training_data[:100]
        test_data = test_data[:100]

    model = GaussianVariationalAutoencoder(
        x_dim=training_data.shape[1],
        z_dim = z_dim,
        encoder_hidden_sizes = hidden_sizes,
        decoder_hidden_sizes = hidden_sizes[::-1],
        w_init_mag = w_init_mag,
        binary_data=binary_x,
        hidden_activation = hidden_activation,
        optimizer=AdaMax(alpha = learning_rate),
        gaussian_min_var = gaussian_min_var,
        rng = seed
        )

    training_fcn = model.train.compile()

    # For display, make functions to sample and represent the manifold.
    sampling_fcn = model.sample.compile()
    z_manifold_grid = np.array([x.flatten() for x in np.meshgrid(np.linspace(-manifold_grid_span, manifold_grid_span, manifold_grid_size),
        np.linspace(-manifold_grid_span, manifold_grid_span, manifold_grid_size))]+[np.zeros(manifold_grid_size**2)]*(z_dim-2)).T
    decoder_mean_fcn = model.decode.compile(fixed_args = dict(z = z_manifold_grid))
    lower_bound_fcn = model.compute_lower_bound.compile()

    for i, minibatch in enumerate(minibatch_iterate(training_data, minibatch_size=minibatch_size, n_epochs=n_epochs)):

        training_fcn(minibatch)

        if i % plot_interval == 0:
            samples = sampling_fcn(25).reshape(5, 5, 28, 28)
            dbplot(samples, 'Samples from Model')
            if binary_x:
                manifold_means = decoder_mean_fcn()
            else:
                manifold_means, _ = decoder_mean_fcn()
            dbplot(manifold_means.reshape(manifold_grid_size, manifold_grid_size, 28, 28), 'First 2-dimensions of manifold.')
        if i % calculation_interval == 0:
            training_lower_bound = lower_bound_fcn(training_data)
            test_lower_bound = lower_bound_fcn(test_data)
            print 'Epoch: %s, Training Lower Bound: %s, Test Lower bound: %s' % \
                (i*minibatch_size/float(len(training_data)), training_lower_bound, test_lower_bound)
Exemplo n.º 35
0
def demo_simple_dbn(minibatch_size=10,
                    n_training_epochs_1=5,
                    n_training_epochs_2=50,
                    n_hidden_1=500,
                    n_hidden_2=10,
                    plot_period=100,
                    eta1=0.01,
                    eta2=0.0001,
                    w_init_mag_1=0.01,
                    w_init_mag_2=0.5,
                    seed=None):
    """
    Train a DBN, and create a function to project the test data into a latent space

    :param minibatch_size:
    :param n_training_epochs_1: Number of training epochs for the first-level RBM
    :param n_training_epochs_2: Number of training epochs for the second-level RBM
    :param n_hidden_1: Number of hidden units for first RBM
    :param n_hidden_2:nNumber of hidden units for second RBM
    :param plot_period: How often to plot
    :param seed:
    :return:
    """

    dataset = get_mnist_dataset(flat=True)
    rng = np.random.RandomState(seed)
    w_init_1 = lambda shape: w_init_mag_1 * rng.randn(*shape)
    w_init_2 = lambda shape: w_init_mag_2 * rng.randn(*shape)

    if is_test_mode():
        n_training_epochs_1 = 0.01
        n_training_epochs_2 = 0.01

    # Train the first RBM
    dbn1 = StackedDeepBeliefNet(rbms=[
        BernoulliBernoulliRBM.from_initializer(
            n_visible=784, n_hidden=n_hidden_1, w_init_fcn=w_init_1)
    ])
    train_first_layer = dbn1.get_training_fcn(
        optimizer=SimpleGradientDescent(eta=eta1), n_gibbs=1,
        persistent=True).compile()
    sample_first_layer = dbn1.get_sampling_fcn(
        initial_vis=dataset.training_set.input[:minibatch_size],
        n_steps=10).compile()
    for i, vis_data in enumerate(
            minibatch_iterate(dataset.training_set.input,
                              minibatch_size=minibatch_size,
                              n_epochs=n_training_epochs_1)):
        if i % plot_period == plot_period - 1:
            dbplot(dbn1.rbms[0].w.get_value().T[:100].reshape([-1, 28, 28]),
                   'weights1')
            dbplot(sample_first_layer()[0].reshape(-1, 28, 28), 'samples1')
        train_first_layer(vis_data)

    # Train the second RBM
    dbn2 = dbn1.stack_another(rbm=BernoulliGaussianRBM.from_initializer(
        n_visible=n_hidden_1, n_hidden=n_hidden_2, w_init_fcn=w_init_2))
    train_second_layer = dbn2.get_training_fcn(
        optimizer=SimpleGradientDescent(eta=eta2), n_gibbs=1,
        persistent=True).compile()
    sample_second_layer = dbn2.get_sampling_fcn(
        initial_vis=dataset.training_set.input[:minibatch_size],
        n_steps=10).compile()
    for i, vis_data in enumerate(
            minibatch_iterate(dataset.training_set.input,
                              minibatch_size=minibatch_size,
                              n_epochs=n_training_epochs_2)):
        if i % plot_period == 0:
            dbplot(dbn2.rbms[1].w.get_value(), 'weights2')
            dbplot(sample_second_layer()[0].reshape(-1, 28, 28), 'samples2')
        train_second_layer(vis_data)

    # Project data to latent space.
    project_to_latent = dbn2.propup.compile(fixed_args=dict(stochastic=False))
    latent_test_data = project_to_latent(dataset.test_set.input)
    print 'Projected the test data to a latent space.  Shape: %s' % (
        latent_test_data.shape, )

    decode = dbn2.propdown.compile(fixed_args=dict(stochastic=False))
    recon_test_data = decode(latent_test_data)
    print 'Reconstructed the test data.  Shape: %s' % (recon_test_data.shape, )
Exemplo n.º 36
0
def demo_simple_dbn(
        minibatch_size = 10,
        n_training_epochs_1 = 5,
        n_training_epochs_2 = 50,
        n_hidden_1 = 500,
        n_hidden_2 = 10,
        plot_period = 100,
        eta1 = 0.01,
        eta2 = 0.0001,
        w_init_mag_1 = 0.01,
        w_init_mag_2 = 0.5,
        seed = None
        ):
    """
    Train a DBN, and create a function to project the test data into a latent space

    :param minibatch_size:
    :param n_training_epochs_1: Number of training epochs for the first-level RBM
    :param n_training_epochs_2: Number of training epochs for the second-level RBM
    :param n_hidden_1: Number of hidden units for first RBM
    :param n_hidden_2:nNumber of hidden units for second RBM
    :param plot_period: How often to plot
    :param seed:
    :return:
    """

    dataset = get_mnist_dataset(flat = True)
    rng = np.random.RandomState(seed)
    w_init_1 = lambda shape: w_init_mag_1 * rng.randn(*shape)
    w_init_2 = lambda shape: w_init_mag_2 * rng.randn(*shape)

    if is_test_mode():
        n_training_epochs_1 = 0.01
        n_training_epochs_2 = 0.01

    # Train the first RBM
    dbn1 = StackedDeepBeliefNet(rbms = [BernoulliBernoulliRBM.from_initializer(n_visible = 784, n_hidden=n_hidden_1, w_init_fcn = w_init_1)])
    train_first_layer = dbn1.get_training_fcn(optimizer=SimpleGradientDescent(eta = eta1), n_gibbs = 1, persistent=True).compile()
    sample_first_layer = dbn1.get_sampling_fcn(initial_vis=dataset.training_set.input[:minibatch_size], n_steps = 10).compile()
    for i, vis_data in enumerate(minibatch_iterate(dataset.training_set.input, minibatch_size=minibatch_size, n_epochs=n_training_epochs_1)):
        if i % plot_period == plot_period-1:
            dbplot(dbn1.rbms[0].w.get_value().T[:100].reshape([-1, 28, 28]), 'weights1')
            dbplot(sample_first_layer()[0].reshape(-1, 28, 28), 'samples1')
        train_first_layer(vis_data)

    # Train the second RBM
    dbn2 = dbn1.stack_another(rbm = BernoulliGaussianRBM.from_initializer(n_visible=n_hidden_1, n_hidden=n_hidden_2, w_init_fcn=w_init_2))
    train_second_layer = dbn2.get_training_fcn(optimizer=SimpleGradientDescent(eta = eta2), n_gibbs = 1, persistent=True).compile()
    sample_second_layer = dbn2.get_sampling_fcn(initial_vis=dataset.training_set.input[:minibatch_size], n_steps = 10).compile()
    for i, vis_data in enumerate(minibatch_iterate(dataset.training_set.input, minibatch_size=minibatch_size, n_epochs=n_training_epochs_2)):
        if i % plot_period == 0:
            dbplot(dbn2.rbms[1].w.get_value(), 'weights2')
            dbplot(sample_second_layer()[0].reshape(-1, 28, 28), 'samples2')
        train_second_layer(vis_data)

    # Project data to latent space.
    project_to_latent = dbn2.propup.compile(fixed_args = dict(stochastic = False))
    latent_test_data = project_to_latent(dataset.test_set.input)
    print 'Projected the test data to a latent space.  Shape: %s' % (latent_test_data.shape, )

    decode = dbn2.propdown.compile(fixed_args = dict(stochastic = False))
    recon_test_data = decode(latent_test_data)
    print 'Reconstructed the test data.  Shape: %s' % (recon_test_data.shape, )
Exemplo n.º 37
0
def get_temporal_mnist_dataset(smoothing_steps=1000, **mnist_kwargs):

    tr_x, tr_y, ts_x, ts_y = get_mnist_dataset(**mnist_kwargs).xyxy
    tr_ixs = temporalize(tr_x, smoothing_steps=smoothing_steps)
    ts_ixs = temporalize(ts_x, smoothing_steps=smoothing_steps)
    return DataSet.from_xyxy(tr_x[tr_ixs], tr_y[tr_ixs], ts_x[ts_ixs], ts_y[ts_ixs])
Exemplo n.º 38
0
def demo_simple_vae_on_mnist(minibatch_size=100,
                             n_epochs=2000,
                             plot_interval=100,
                             calculation_interval=500,
                             z_dim=2,
                             hidden_sizes=[400, 200],
                             learning_rate=0.003,
                             hidden_activation='softplus',
                             binary_x=True,
                             w_init_mag=0.01,
                             gaussian_min_var=None,
                             manifold_grid_size=11,
                             manifold_grid_span=2,
                             seed=None):
    """
    Train a Variational Autoencoder on MNIST and look at the samples it generates.
    """

    dataset = get_mnist_dataset(flat=True)
    training_data = dataset.training_set.input
    test_data = dataset.test_set.input

    if is_test_mode():
        n_epochs = 1
        minibatch_size = 10
        training_data = training_data[:100]
        test_data = test_data[:100]

    model = GaussianVariationalAutoencoder(
        x_dim=training_data.shape[1],
        z_dim=z_dim,
        encoder_hidden_sizes=hidden_sizes,
        decoder_hidden_sizes=hidden_sizes[::-1],
        w_init_mag=w_init_mag,
        binary_data=binary_x,
        hidden_activation=hidden_activation,
        optimizer=AdaMax(alpha=learning_rate),
        gaussian_min_var=gaussian_min_var,
        rng=seed)

    training_fcn = model.train.compile()

    # For display, make functions to sample and represent the manifold.
    sampling_fcn = model.sample.compile()
    z_manifold_grid = np.array([
        x.flatten() for x in np.meshgrid(
            np.linspace(-manifold_grid_span, manifold_grid_span,
                        manifold_grid_size),
            np.linspace(-manifold_grid_span, manifold_grid_span,
                        manifold_grid_size))
    ] + [np.zeros(manifold_grid_size**2)] * (z_dim - 2)).T
    decoder_mean_fcn = model.decode.compile(fixed_args=dict(z=z_manifold_grid))
    lower_bound_fcn = model.compute_lower_bound.compile()

    for i, minibatch in enumerate(
            minibatch_iterate(training_data,
                              minibatch_size=minibatch_size,
                              n_epochs=n_epochs)):

        training_fcn(minibatch)

        if i % plot_interval == 0:
            samples = sampling_fcn(25).reshape(5, 5, 28, 28)
            dbplot(samples, 'Samples from Model')
            if binary_x:
                manifold_means = decoder_mean_fcn()
            else:
                manifold_means, _ = decoder_mean_fcn()
            dbplot(
                manifold_means.reshape(manifold_grid_size, manifold_grid_size,
                                       28, 28),
                'First 2-dimensions of manifold.')
        if i % calculation_interval == 0:
            training_lower_bound = lower_bound_fcn(training_data)
            test_lower_bound = lower_bound_fcn(test_data)
            print 'Epoch: %s, Training Lower Bound: %s, Test Lower bound: %s' % \
                (i*minibatch_size/float(len(training_data)), training_lower_bound, test_lower_bound)
Exemplo n.º 39
0
def compare_spiking_to_nonspiking(hidden_sizes = [300, 300], eta=0.01, w_init=0.01, fractional = False, n_epochs = 20,
                                  forward_discretize = 'rect-herding', back_discretize = 'noreset-herding', test_discretize='rect-herding', save_results = False):

    mnist = get_mnist_dataset(flat=True).to_onehot()
    test_epochs=[0.0, 0.05, 0.1, 0.2, 0.5]+range(1, n_epochs+1)

    if is_test_mode():
        mnist = mnist.shorten(500)
        eta = 0.01
        w_init=0.01
        test_epochs = [0.0, 0.05, 0.1]

    spiking_net = JavaSpikingNetWrapper.from_init(
        fractional = fractional,
        depth_first=False,
        smooth_grads = False,
        forward_discretize = forward_discretize,
        back_discretize = back_discretize,
        test_discretize = test_discretize,
        w_init=w_init,
        hold_error=True,
        rng = 1234,
        n_steps = 10,
        eta=eta,
        layer_sizes=[784]+hidden_sizes+[10],
        )

    relu_net = GradientBasedPredictor(
        MultiLayerPerceptron.from_init(
            hidden_activation = 'relu',
            output_activation = 'relu',
            layer_sizes=[784]+hidden_sizes+[10],
            use_bias=False,
            w_init=w_init,
            rng=1234,
            ),
        cost_function = 'mse',
        optimizer=GradientDescent(eta)
        ).compile()

    # Listen for spikes
    forward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')()
    backward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')()
    for lay in spiking_net.jnet.layers:
        lay.forward_herder.add_eavesdropper(forward_eavesdropper)
    for lay in spiking_net.jnet.layers[1:]:
        lay.backward_herder.add_eavesdropper(backward_eavesdropper)
    spiking_net.jnet.error_counter.add_eavesdropper(backward_eavesdropper)
    forward_counts = []
    backward_counts = []

    def register_counts():
        forward_counts.append(forward_eavesdropper.get_count())
        backward_counts.append(backward_eavesdropper.get_count())

    results = compare_predictors(
        dataset=mnist,
        online_predictors={
            'Spiking-MLP': spiking_net,
            'ReLU-MLP': relu_net,
            },
        test_epochs=test_epochs,
        online_test_callbacks=lambda p: register_counts() if p is spiking_net else None,
        minibatch_size = 1,
        test_on = 'training+test',
        evaluation_function=percent_argmax_incorrect,
        )

    spiking_params = [np.array(lay.forward_weights.w.asFloat()).copy() for lay in spiking_net.jnet.layers]
    relu_params = [param.get_value().astype(np.float64) for param in relu_net.parameters]

    # See what the score is when we apply the final spiking weights to the
    offline_trained_spiking_net = JavaSpikingNetWrapper(
        ws=relu_params,
        fractional = fractional,
        depth_first=False,
        smooth_grads = False,
        forward_discretize = forward_discretize,
        back_discretize = back_discretize,
        test_discretize = test_discretize,
        hold_error=True,
        n_steps = 10,
        eta=eta,
        )

    # for spiking_layer, p in zip(spiking_net.jnet.layers, relu_params):
    #     spiking_layer.w = p.astype(np.float64)

    error = [
        ('Test', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.test_set.input), mnist.test_set.target)),
        ('Training', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.training_set.input), mnist.training_set.target))
        ]
    results['Spiking-MLP with ReLU weights'] = LearningCurveData()
    results['Spiking-MLP with ReLU weights'].add(None, error)
    print 'Spiking-MLP with ReLU weights: %s' % error
    # --------------------------------------------------------------------------

    # See what the score is when we plug the spiking weights into the ReLU net.
    for param, sval in zip(relu_net.parameters, spiking_params):
        param.set_value(sval)
    error = [
        ('Test', percent_argmax_incorrect(relu_net.predict(mnist.test_set.input), mnist.test_set.target)),
        ('Training', percent_argmax_incorrect(relu_net.predict(mnist.training_set.input), mnist.training_set.target))
        ]
    results['ReLU-MLP with Spiking weights'] = LearningCurveData()
    results['ReLU-MLP with Spiking weights'].add(None, error)
    print 'ReLU-MLP with Spiking weights: %s' % error
    # --------------------------------------------------------------------------

    if save_results:
        with open("mnist_relu_vs_spiking_results-%s.pkl" % datetime.now(), 'w') as f:
            pickle.dump(results, f)

    # Problem: this currently includes test
    forward_rates = np.diff(forward_counts) / (np.diff(test_epochs)*60000)
    backward_rates = np.diff(backward_counts) / (np.diff(test_epochs)*60000)

    plt.figure('ReLU vs Spikes')
    plt.subplot(211)
    plot_learning_curves(results, title = "MNIST Learning Curves", hang=False, figure_name='ReLU vs Spikes', xscale='linear', yscale='log', y_title='Percent Error')
    plt.subplot(212)
    plt.plot(test_epochs[1:], forward_rates)
    plt.plot(test_epochs[1:], backward_rates)
    plt.xlabel('Epoch')
    plt.ylabel('n_spikes')
    plt.legend(['Mean Forward Spikes', 'Mean Backward Spikes'], loc='best')
    plt.interactive(is_test_mode())
    plt.show()
Exemplo n.º 40
0
)


ExperimentLibrary.try_hyperparams = Experiment(
    description="Compare the various hyperparameters to the baseline.",
    function=with_jpype(lambda
            fractional = False,
            depth_first = False,
            smooth_grads = False,
            back_discretize = 'noreset-herding',
            n_steps = 10,
            hidden_sizes = [200, 200],
            hold_error = True,
            :
        compare_predictors(
            dataset=(get_mnist_dataset(flat=True).shorten(100) if is_test_mode() else get_mnist_dataset(flat=True)).to_onehot(),
            online_predictors={'Spiking MLP': JavaSpikingNetWrapper.from_init(
                fractional = fractional,
                depth_first = depth_first,
                smooth_grads = smooth_grads,
                back_discretize = back_discretize,
                w_init=0.01,
                rng = 1234,
                eta=0.01,
                n_steps = n_steps,
                hold_error=hold_error,
                layer_sizes=[784]+hidden_sizes+[10],
                )},
            test_epochs=[0.0, 0.05] if is_test_mode() else [0.0, 0.05, 0.1, 0.2, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4],
            minibatch_size = 1,
            report_test_scores=True,
Exemplo n.º 41
0
def demo_dbn_mnist(plot=True, test_mode=True):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.
    """

    set_enable_omniscence(True)
    minibatch_size = 20
    dataset = get_mnist_dataset().process_with(
        inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), ))
    w_init = lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out)
    n_training_epochs_1 = 20
    n_training_epochs_2 = 20
    check_period = 300

    if test_mode:
        n_training_epochs_1 = 0.01
        n_training_epochs_2 = 0.01
        check_period = 100

    dbn = DeepBeliefNet(layers={
        'vis': StochasticLayer('bernoulli'),
        'hid': StochasticLayer('bernoulli'),
        'ass': StochasticLayer('bernoulli'),
        'lab': StochasticLayer('bernoulli'),
    },
                        bridges={
                            ('vis', 'hid'):
                            FullyConnectedBridge(w=w_init(784, 500), b_rev=0),
                            ('hid', 'ass'):
                            FullyConnectedBridge(w=w_init(500, 500), b_rev=0),
                            ('lab', 'ass'):
                            FullyConnectedBridge(w=w_init(10, 500), b_rev=0)
                        })

    # Compile the functions you're gonna use.
    train_first_layer = dbn.get_constrastive_divergence_function(
        visible_layers='vis',
        hidden_layers='hid',
        optimizer=SimpleGradientDescent(eta=0.01),
        n_gibbs=1,
        persistent=True).compile()
    free_energy_of_first_layer = dbn.get_free_energy_function(
        visible_layers='vis', hidden_layers='hid').compile()
    train_second_layer = dbn.get_constrastive_divergence_function(
        visible_layers=('hid', 'lab'),
        hidden_layers='ass',
        input_layers=('vis', 'lab'),
        n_gibbs=1,
        persistent=True).compile()
    predict_label = dbn.get_inference_function(input_layers='vis',
                                               output_layers='lab',
                                               path=[('vis', 'hid'),
                                                     ('hid', 'ass'),
                                                     ('ass', 'lab')],
                                               smooth=True).compile()

    encode_label = OneHotEncoding(n_classes=10)

    # Step 1: Train the first layer, plotting the weights and persistent chain state.
    if plot:
        train_first_layer.set_debug_variables(
            lambda: {
                'weights':
                dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)),
                'smooth_vis_state':
                dbn.get_inference_function('hid', 'vis', smooth=True).
                symbolic_stateless(*train_first_layer.locals()[
                    'initial_hidden']).reshape((-1, 28, 28))
            })
        plotter = LiveStream(train_first_layer.get_debug_values)

    for i, (n_samples, visible_data, label_data) in enumerate(
            dataset.training_set.minibatch_iterator(
                minibatch_size=minibatch_size,
                epochs=n_training_epochs_1,
                single_channel=True)):
        train_first_layer(visible_data)
        if i % check_period == 0:
            print 'Free Energy of Test Data: %s' % (free_energy_of_first_layer(
                dataset.test_set.input).mean())
            if plot:
                plotter.update()

    # Step 2: Train the second layer and simultanously compute the classification error from forward passes.
    if plot:
        train_second_layer.set_debug_variables(
            lambda: {
                'w_vis_hid':
                dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)),
                'w_hid_ass':
                dbn._bridges['hid', 'ass']._w,
                'w_lab_ass':
                dbn._bridges['hid', 'ass']._w,
                'associative_state':
                train_second_layer.locals()['sleep_hidden'][0].reshape(
                    (-1, 20, 25)),
                'hidden_state':
                train_second_layer.locals()['sleep_visible'][0].reshape(
                    (-1, 20, 25)),
                'smooth_vis_state':
                dbn.get_inference_function('hid', 'vis', smooth=True).
                symbolic_stateless(train_second_layer.locals()['sleep_visible']
                                   [0]).reshape((-1, 28, 28))
            })
        plotter = LiveStream(train_first_layer.get_debug_values)

    for i, (n_samples, visible_data, label_data) in enumerate(
            dataset.training_set.minibatch_iterator(
                minibatch_size=minibatch_size,
                epochs=n_training_epochs_2,
                single_channel=True)):
        train_second_layer(visible_data, encode_label(label_data))
        if i % check_period == 0:
            out, = predict_label(dataset.test_set.input)
            score = percent_argmax_correct(actual=out,
                                           target=dataset.test_set.target)
            print 'Classification Score: %s' % score
            if plot:
                plotter.update()
Exemplo n.º 42
0
def demo_dbn_mnist(plot = True, test_mode = True):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.
    """

    set_enable_omniscence(True)
    minibatch_size = 20
    dataset = get_mnist_dataset().process_with(inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), ))
    w_init = lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out)
    n_training_epochs_1 = 20
    n_training_epochs_2 = 20
    check_period = 300

    if test_mode:
        n_training_epochs_1 = 0.01
        n_training_epochs_2 = 0.01
        check_period=100

    dbn = DeepBeliefNet(
        layers = {
            'vis': StochasticLayer('bernoulli'),
            'hid': StochasticLayer('bernoulli'),
            'ass': StochasticLayer('bernoulli'),
            'lab': StochasticLayer('bernoulli'),
            },
        bridges = {
            ('vis', 'hid'): FullyConnectedBridge(w = w_init(784, 500), b_rev = 0),
            ('hid', 'ass'): FullyConnectedBridge(w = w_init(500, 500), b_rev = 0),
            ('lab', 'ass'): FullyConnectedBridge(w = w_init(10, 500), b_rev = 0)
        }
    )

    # Compile the functions you're gonna use.
    train_first_layer = dbn.get_constrastive_divergence_function(visible_layers = 'vis', hidden_layers='hid', optimizer=SimpleGradientDescent(eta = 0.01), n_gibbs = 1, persistent=True).compile()
    free_energy_of_first_layer = dbn.get_free_energy_function(visible_layers='vis', hidden_layers='hid').compile()
    train_second_layer = dbn.get_constrastive_divergence_function(visible_layers=('hid', 'lab'), hidden_layers='ass', input_layers=('vis', 'lab'), n_gibbs=1, persistent=True).compile()
    predict_label = dbn.get_inference_function(input_layers = 'vis', output_layers='lab', path = [('vis', 'hid'), ('hid', 'ass'), ('ass', 'lab')], smooth = True).compile()

    encode_label = OneHotEncoding(n_classes=10)

    # Step 1: Train the first layer, plotting the weights and persistent chain state.
    if plot:
        train_first_layer.set_debug_variables(lambda: {
                'weights': dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)),
                'smooth_vis_state': dbn.get_inference_function('hid', 'vis', smooth = True).symbolic_stateless(*train_first_layer.locals()['initial_hidden']).reshape((-1, 28, 28))
            })
        plotter = LiveStream(train_first_layer.get_debug_values)

    for i, (n_samples, visible_data, label_data) in enumerate(dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_training_epochs_1, single_channel = True)):
        train_first_layer(visible_data)
        if i % check_period == 0:
            print 'Free Energy of Test Data: %s' % (free_energy_of_first_layer(dataset.test_set.input).mean())
            if plot:
                plotter.update()

    # Step 2: Train the second layer and simultanously compute the classification error from forward passes.
    if plot:
        train_second_layer.set_debug_variables(lambda: {
            'w_vis_hid': dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)),
            'w_hid_ass': dbn._bridges['hid', 'ass']._w,
            'w_lab_ass': dbn._bridges['hid', 'ass']._w,
            'associative_state': train_second_layer.locals()['sleep_hidden'][0].reshape((-1, 20, 25)),
            'hidden_state': train_second_layer.locals()['sleep_visible'][0].reshape((-1, 20, 25)),
            'smooth_vis_state': dbn.get_inference_function('hid', 'vis', smooth = True).symbolic_stateless(train_second_layer.locals()['sleep_visible'][0]).reshape((-1, 28, 28))
            })
        plotter = LiveStream(train_first_layer.get_debug_values)

    for i, (n_samples, visible_data, label_data) in enumerate(dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_training_epochs_2, single_channel = True)):
        train_second_layer(visible_data, encode_label(label_data))
        if i % check_period == 0:
            out, = predict_label(dataset.test_set.input)
            score = percent_argmax_correct(actual = out, target = dataset.test_set.target)
            print 'Classification Score: %s' % score
            if plot:
                plotter.update()
Exemplo n.º 43
0
def demo_rbm_mnist(plot=True, test_mode=False):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.

    What you see:
    A plot will appear with 6 subplots.  The subplots are as follows:
    hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples.
    visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain.
    w: A subset of the weight vectors, reshaped to the shape of the input.
    b: The bias of the hidden units.
    b_rev: The bias of the visible units.
    visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the
        training function).

    As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data.
    """
    set_enable_omniscence(True)
    minibatch_size = 9
    n_epochs = 0.01 if test_mode else 10

    dataset = get_mnist_dataset().process_with(
        inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), ))

    rbm = simple_rbm(
        visible_layer=StochasticLayer('bernoulli'),
        bridge=FullyConnectedBridge(
            w=0.001 *
            np.random.randn(28 * 28, 500).astype(theano.config.floatX),
            b=0,
            b_rev=0),
        hidden_layer=StochasticLayer('bernoulli'))

    train_function = rbm.get_training_fcn(
        n_gibbs=4, persistent=True,
        optimizer=SimpleGradientDescent(eta=0.01)).compile()
    sampling_function = rbm.get_free_sampling_fcn(
        init_visible_state=np.random.randn(9, 28 * 28),
        return_smooth_visible=True).compile()

    if plot:

        def debug_variable_setter():
            lv = train_function.symbolic.locals()
            return {
                'hidden-neg-chain':
                lv.sleep_hidden.reshape((-1, 25, 20)),
                'visible-neg-chain':
                lv.hidden_layer.smooth(lv.bridge.reverse(
                    lv.sleep_hidden)).reshape((-1, 28, 28)),
                'w':
                lv.bridge.parameters[0].T[:25].reshape((-1, 28, 28)),
                'b':
                lv.bridge.parameters[1].reshape((25, 20)),
                'b_rev':
                lv.bridge.parameters[2].reshape((28, 28)),
            }

        train_function.set_debug_variables(debug_variable_setter)

    stream = LiveStream(lambda: dict(train_function.get_debug_values().items(
    ) + [('visible-sample', visible_samples.reshape((-1, 28, 28)))]),
                        update_every=10)
    for _, visible_data, _ in dataset.training_set.minibatch_iterator(
            minibatch_size=minibatch_size, epochs=n_epochs,
            single_channel=True):
        visible_samples, _ = sampling_function()
        train_function(visible_data)
        if plot:
            stream.update()
Exemplo n.º 44
0
def demo_mnist_mlp(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        hidden_sizes = [300],
        w_init = 0.01,
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        cost = 'nll-d',
        visualize_params = False,
        n_test_points = 30,
        n_epochs = 10,
        max_training_samples = None,
        use_bias = True,
        onehot = False,
        rng = 1234,
        plot = False,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples)

    if onehot:
        dataset = dataset.to_onehot()

    if minibatch_size == 'full':
        minibatch_size = dataset.training_set.n_samples

    optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate)

    # Setup the training and test functions
    predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes=[dataset.input_size]+hidden_sizes+[10],
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init = w_init,
            use_bias=use_bias,
            rng = rng,
            ),
        cost_function=cost,
        optimizer=optimizer
        ).compile()  # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays.

    def vis_callback(xx):
        p = predictor.symbolic_predictor._function
        in_layer = {
            'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28),
            'Layer[0].b': p.layers[0].linear_transform._b.get_value(),
            }
        other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])]
        dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], [])))

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size,
        test_callback=vis_callback if visualize_params else None
    )

    if plot:
        plot_learning_curves(results)