def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron(
                layer_sizes=[hidden_size, dataset.n_categories],
                input_size = dataset.input_size,
                hidden_activation='sig',
                output_activation='lin',
                w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Exemplo n.º 2
0
def test_compare_predictors(hang_plot=False):

    dataset = get_synthetic_clusters_dataset()

    w_constructor = lambda rng=np.random.RandomState(45): .1 * rng.randn(
        dataset.input_shape[0], dataset.n_categories)
    records = compare_predictors(
        dataset=dataset,
        offline_predictors={'SVM': SVC()},
        online_predictors={
            'fast-perceptron':
            Perceptron(alpha=0.1, w=w_constructor()).to_categorical(),
            'slow-perceptron':
            Perceptron(alpha=0.001, w=w_constructor()).to_categorical()
        },
        minibatch_size=10,
        test_epochs=sqrtspace(0, 10, 20),
        evaluation_function='percent_correct')

    assert 99 < records['SVM'].get_scores('Test') <= 100
    assert 20 < records['slow-perceptron'].get_scores(
        'Test')[0] < 40 and 95 < records['slow-perceptron'].get_scores(
            'Test')[-1] <= 100
    assert 20 < records['fast-perceptron'].get_scores(
        'Test')[0] < 40 and 98 < records['fast-perceptron'].get_scores(
            'Test')[-1] <= 100

    plot_learning_curves(records, hang=hang_plot)
Exemplo n.º 3
0
def demo_compare_dtp_methods(predictor_constructors,
                             n_epochs=10,
                             minibatch_size=20,
                             n_tests=20,
                             onehot=True,
                             accumulator=None):
    dataset = get_mnist_dataset(flat=True, binarize=False)
    n_categories = dataset.n_categories
    if onehot:
        dataset = dataset.to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors={
            name: p(dataset.input_size, n_categories)
            for name, p in predictor_constructors.iteritems()
            if name in predictor_constructors
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
        # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))},
        accumulators=accumulator)

    plot_learning_curves(learning_curves)
Exemplo n.º 4
0
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories],
                hidden_activation='sig',
                output_activation='lin',
                w_init = 0.01,
                rng = 5
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Exemplo n.º 5
0
def demo_dtp_varieties(hidden_sizes=[240],
                       n_epochs=10,
                       minibatch_size=20,
                       n_tests=20,
                       hidden_activation='tanh',
                       output_activation='sigm',
                       optimizer='adamax',
                       learning_rate=0.01,
                       noise=1,
                       predictors=['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'],
                       rng=1234,
                       use_bias=True,
                       live_plot=False,
                       plot=False):
    """
    ;

    :param hidden_sizes:
    :param n_epochs:
    :param minibatch_size:
    :param n_tests:
    :return:
    """
    if isinstance(predictors, str):
        predictors = [predictors]

    dataset = get_mnist_dataset(flat=True)
    dataset = dataset.process_with(
        targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    predictors = OrderedDict(
        (name,
         get_predictor(name,
                       input_size=dataset.input_size,
                       target_size=dataset.target_size,
                       hidden_sizes=hidden_sizes,
                       hidden_activation=hidden_activation,
                       output_activation=output_activation,
                       optimizer=optimizer,
                       learning_rate=learning_rate,
                       noise=noise,
                       use_bias=use_bias,
                       rng=rng)) for name in predictors)

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors=predictors,
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )

    if plot:
        plot_learning_curves(learning_curves)
Exemplo n.º 6
0
def demo_compare_dtp_methods(
        predictor_constructors,
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        onehot = True,
        accumulator = None
        ):
    dataset = get_mnist_dataset(flat = True, binarize = False)
    n_categories = dataset.n_categories
    if onehot:
        dataset = dataset.to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = {name: p(dataset.input_size, n_categories) for name, p in predictor_constructors.iteritems() if name in predictor_constructors},
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))},
        accumulators=accumulator
        )

    plot_learning_curves(learning_curves)
Exemplo n.º 7
0
def compare_example_predictors(
    n_epochs=5,
    n_tests=20,
    minibatch_size=10,
):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat=True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset=dataset,
        online_predictors={
            'Perceptron':
            Perceptron(w=np.zeros((dataset.input_size, dataset.n_categories)),
                       alpha=0.001).
            to_categorical(
                n_categories=dataset.n_categories
            ),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[
                        dataset.input_size, 500, dataset.n_categories
                    ],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation=
                    'softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init=0.01,
                    rng=5),
                cost_function=
                negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer=SimpleGradientDescent(eta=0.1),
            ).compile(),  # .compile() returns an IPredictor
        },
        offline_predictors={'RF': RandomForestClassifier(n_estimators=40)},
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct  # Compares one-hot
    )
    # Results is a LearningCurveData object
    return learning_curve_data
Exemplo n.º 8
0
def compare_example_predictors(
        n_epochs = 5,
        n_tests = 20,
        minibatch_size = 10,
    ):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat = True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset = dataset,
        online_predictors = {
            'Perceptron': Perceptron(
                w = np.zeros((dataset.input_size, dataset.n_categories)),
                alpha = 0.001
                ).to_categorical(n_categories = dataset.n_categories),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP': GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size, 500, dataset.n_categories],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation='softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init = 0.01,
                    rng = 5
                ),
                cost_function = negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer = SimpleGradientDescent(eta = 0.1),
                ).compile(),  # .compile() returns an IPredictor
            },
        offline_predictors={
            'RF': RandomForestClassifier(n_estimators = 40)
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct  # Compares one-hot
        )
    # Results is a LearningCurveData object
    return learning_curve_data
Exemplo n.º 9
0
def demo_compare_dtp_optimizers(
    hidden_sizes=[240],
    n_epochs=10,
    minibatch_size=20,
    n_tests=20,
    hidden_activation='tanh',
):

    dataset = get_mnist_dataset(flat=True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    def make_dtp_net(optimizer_constructor, output_fcn):
        return DifferenceTargetMLP.from_initializer(
            input_size=dataset.input_size,
            output_size=dataset.target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=optimizer_constructor,
            input_activation='sigm',
            hidden_activation=hidden_activation,
            output_activation=output_fcn,
            w_init_mag=0.01,
            noise=1,
        ).compile()

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors={
            'SGD-0.001-softmax':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='softmax'),
            'AdaMax-0.001-softmax':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='softmax'),
            'RMSProp-0.001-softmax':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='softmax'),
            'SGD-0.001-sigm':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='sigm'),
            'AdaMax-0.001-sigm':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='sigm'),
            'RMSProp-0.001-sigm':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='sigm'),
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )

    plot_learning_curves(learning_curves)
def demo_dtp_varieties(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        hidden_activation = 'tanh',
        output_activation = 'sigm',
        optimizer = 'adamax',
        learning_rate = 0.01,
        noise = 1,
        predictors = ['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'],
        rng = 1234,
        live_plot = False,
        plot = False
        ):
    """
    ;

    :param hidden_sizes:
    :param n_epochs:
    :param minibatch_size:
    :param n_tests:
    :return:
    """
    if isinstance(predictors, str):
        predictors = [predictors]

    dataset = get_mnist_dataset(flat = True)
    dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    predictors = OrderedDict((name, get_predictor(name, input_size = dataset.input_size, target_size=dataset.target_size,
            hidden_sizes=hidden_sizes, hidden_activation=hidden_activation, output_activation = output_activation,
            optimizer=optimizer, learning_rate=learning_rate, noise = noise, rng = rng)) for name in predictors)

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = predictors,
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )

    if plot:
        plot_learning_curves(learning_curves)
Exemplo n.º 11
0
def mlp_normalization(hidden_size=300,
                      n_epochs=30,
                      n_tests=50,
                      minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
        function=MultiLayerPerceptron.from_init(layer_sizes=[
            dataset.input_size, hidden_size, dataset.n_categories
        ],
                                                hidden_activation='sig',
                                                output_activation='lin',
                                                normalize_minibatch=normalize,
                                                scale_param=scale,
                                                w_init=0.01,
                                                rng=5),
        cost_function=softmax_negative_log_likelihood,
        optimizer=SimpleGradientDescent(eta=0.1),
    ).compile()

    return compare_predictors(dataset=dataset,
                              online_predictors={
                                  'regular':
                                  make_mlp(normalize=False, scale=False),
                                  'normalize':
                                  make_mlp(normalize=True, scale=False),
                                  'normalize and scale':
                                  make_mlp(normalize=True, scale=True),
                              },
                              minibatch_size=minibatch_size,
                              test_epochs=sqrtspace(0, n_epochs, n_tests),
                              evaluation_function=percent_argmax_correct)
Exemplo n.º 12
0
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories],
                hidden_activation='sig',
                output_activation='lin',
                normalize_minibatch=normalize,
                scale_param=scale,
                w_init = 0.01,
                rng = 5
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = SimpleGradientDescent(eta = 0.1),
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'regular': make_mlp(normalize = False, scale = False),
            'normalize': make_mlp(normalize=True, scale = False),
            'normalize and scale': make_mlp(normalize=True, scale = True),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Exemplo n.º 13
0
def backprop_vs_difference_target_prop(hidden_sizes=[240],
                                       n_epochs=10,
                                       minibatch_size=20,
                                       n_tests=20):

    dataset = get_mnist_dataset(flat=True)
    dataset = dataset.process_with(
        targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors={
            'backprop-mlp':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size] + hidden_sizes +
                    [dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init=0.01,
                    rng=5),
                cost_function=mean_squared_error,
                optimizer=AdaMax(0.01),
            ).compile(),
            'difference-target-prop-mlp':
            DifferenceTargetMLP.from_initializer(
                input_size=dataset.input_size,
                output_size=dataset.target_size,
                hidden_sizes=hidden_sizes,
                optimizer_constructor=lambda: AdaMax(0.01),
                w_init=0.01,
                noise=1,
            ).compile()
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )
Exemplo n.º 14
0
def backprop_vs_difference_target_prop(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20
        ):

    dataset = get_mnist_dataset(flat = True)
    dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'backprop-mlp': GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size]+hidden_sizes+[dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init = 0.01,
                    rng = 5
                    ),
                cost_function = mean_squared_error,
                optimizer = AdaMax(0.01),
                ).compile(),
            'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer(
                input_size = dataset.input_size,
                output_size = dataset.target_size,
                hidden_sizes = hidden_sizes,
                optimizer_constructor = lambda: AdaMax(0.01),
                w_init=0.01,
                noise = 1,
            ).compile()
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )
Exemplo n.º 15
0
def demo_compare_dtp_optimizers(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        hidden_activation = 'tanh',
        ):

    dataset = get_mnist_dataset(flat = True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    def make_dtp_net(optimizer_constructor, output_fcn):
        return DifferenceTargetMLP.from_initializer(
            input_size = dataset.input_size,
            output_size = dataset.target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = optimizer_constructor,
            input_activation='sigm',
            hidden_activation=hidden_activation,
            output_activation=output_fcn,
            w_init_mag=0.01,
            noise = 1,
            ).compile()

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = {
            'SGD-0.001-softmax': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'softmax'),
            'AdaMax-0.001-softmax': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'softmax'),
            'RMSProp-0.001-softmax': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'softmax'),
            'SGD-0.001-sigm': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'sigm'),
            'AdaMax-0.001-sigm': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'sigm'),
            'RMSProp-0.001-sigm': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'sigm'),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )

    plot_learning_curves(learning_curves)
Exemplo n.º 16
0
def test_compare_predictors(hang_plot = False):

    dataset = get_synthetic_clusters_dataset()

    w_constructor = lambda rng = np.random.RandomState(45): .1*rng.randn(dataset.input_shape[0], dataset.n_categories)
    records = compare_predictors(
        dataset = dataset,
        offline_predictors={
            'SVM': SVC()
            },
        online_predictors={
            'fast-perceptron': Perceptron(alpha = 0.1, w = w_constructor()).to_categorical(),
            'slow-perceptron': Perceptron(alpha = 0.001, w = w_constructor()).to_categorical()
            },
        minibatch_size = 10,
        test_epochs = sqrtspace(0, 10, 20),
        evaluation_function='percent_correct'
        )

    assert 99 < records['SVM'].get_scores('Test') <= 100
    assert 20 < records['slow-perceptron'].get_scores('Test')[0] < 40 and 95 < records['slow-perceptron'].get_scores('Test')[-1] <= 100
    assert 20 < records['fast-perceptron'].get_scores('Test')[0] < 40 and 98 < records['fast-perceptron'].get_scores('Test')[-1] <= 100

    plot_learning_curves(records, hang = hang_plot)
Exemplo n.º 17
0
def compare_spiking_to_nonspiking(hidden_sizes = [300, 300], eta=0.01, w_init=0.01, fractional = False, n_epochs = 20,
                                  forward_discretize = 'rect-herding', back_discretize = 'noreset-herding', test_discretize='rect-herding', save_results = False):

    mnist = get_mnist_dataset(flat=True).to_onehot()
    test_epochs=[0.0, 0.05, 0.1, 0.2, 0.5]+range(1, n_epochs+1)

    if is_test_mode():
        mnist = mnist.shorten(500)
        eta = 0.01
        w_init=0.01
        test_epochs = [0.0, 0.05, 0.1]

    spiking_net = JavaSpikingNetWrapper.from_init(
        fractional = fractional,
        depth_first=False,
        smooth_grads = False,
        forward_discretize = forward_discretize,
        back_discretize = back_discretize,
        test_discretize = test_discretize,
        w_init=w_init,
        hold_error=True,
        rng = 1234,
        n_steps = 10,
        eta=eta,
        layer_sizes=[784]+hidden_sizes+[10],
        )

    relu_net = GradientBasedPredictor(
        MultiLayerPerceptron.from_init(
            hidden_activation = 'relu',
            output_activation = 'relu',
            layer_sizes=[784]+hidden_sizes+[10],
            use_bias=False,
            w_init=w_init,
            rng=1234,
            ),
        cost_function = 'mse',
        optimizer=GradientDescent(eta)
        ).compile()

    # Listen for spikes
    forward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')()
    backward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')()
    for lay in spiking_net.jnet.layers:
        lay.forward_herder.add_eavesdropper(forward_eavesdropper)
    for lay in spiking_net.jnet.layers[1:]:
        lay.backward_herder.add_eavesdropper(backward_eavesdropper)
    spiking_net.jnet.error_counter.add_eavesdropper(backward_eavesdropper)
    forward_counts = []
    backward_counts = []

    def register_counts():
        forward_counts.append(forward_eavesdropper.get_count())
        backward_counts.append(backward_eavesdropper.get_count())

    results = compare_predictors(
        dataset=mnist,
        online_predictors={
            'Spiking-MLP': spiking_net,
            'ReLU-MLP': relu_net,
            },
        test_epochs=test_epochs,
        online_test_callbacks=lambda p: register_counts() if p is spiking_net else None,
        minibatch_size = 1,
        test_on = 'training+test',
        evaluation_function=percent_argmax_incorrect,
        )

    spiking_params = [np.array(lay.forward_weights.w.asFloat()).copy() for lay in spiking_net.jnet.layers]
    relu_params = [param.get_value().astype(np.float64) for param in relu_net.parameters]

    # See what the score is when we apply the final spiking weights to the
    offline_trained_spiking_net = JavaSpikingNetWrapper(
        ws=relu_params,
        fractional = fractional,
        depth_first=False,
        smooth_grads = False,
        forward_discretize = forward_discretize,
        back_discretize = back_discretize,
        test_discretize = test_discretize,
        hold_error=True,
        n_steps = 10,
        eta=eta,
        )

    # for spiking_layer, p in zip(spiking_net.jnet.layers, relu_params):
    #     spiking_layer.w = p.astype(np.float64)

    error = [
        ('Test', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.test_set.input), mnist.test_set.target)),
        ('Training', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.training_set.input), mnist.training_set.target))
        ]
    results['Spiking-MLP with ReLU weights'] = LearningCurveData()
    results['Spiking-MLP with ReLU weights'].add(None, error)
    print 'Spiking-MLP with ReLU weights: %s' % error
    # --------------------------------------------------------------------------

    # See what the score is when we plug the spiking weights into the ReLU net.
    for param, sval in zip(relu_net.parameters, spiking_params):
        param.set_value(sval)
    error = [
        ('Test', percent_argmax_incorrect(relu_net.predict(mnist.test_set.input), mnist.test_set.target)),
        ('Training', percent_argmax_incorrect(relu_net.predict(mnist.training_set.input), mnist.training_set.target))
        ]
    results['ReLU-MLP with Spiking weights'] = LearningCurveData()
    results['ReLU-MLP with Spiking weights'].add(None, error)
    print 'ReLU-MLP with Spiking weights: %s' % error
    # --------------------------------------------------------------------------

    if save_results:
        with open("mnist_relu_vs_spiking_results-%s.pkl" % datetime.now(), 'w') as f:
            pickle.dump(results, f)

    # Problem: this currently includes test
    forward_rates = np.diff(forward_counts) / (np.diff(test_epochs)*60000)
    backward_rates = np.diff(backward_counts) / (np.diff(test_epochs)*60000)

    plt.figure('ReLU vs Spikes')
    plt.subplot(211)
    plot_learning_curves(results, title = "MNIST Learning Curves", hang=False, figure_name='ReLU vs Spikes', xscale='linear', yscale='log', y_title='Percent Error')
    plt.subplot(212)
    plt.plot(test_epochs[1:], forward_rates)
    plt.plot(test_epochs[1:], backward_rates)
    plt.xlabel('Epoch')
    plt.ylabel('n_spikes')
    plt.legend(['Mean Forward Spikes', 'Mean Backward Spikes'], loc='best')
    plt.interactive(is_test_mode())
    plt.show()
Exemplo n.º 18
0
         smooth_grads = False,
         back_discretize = 'noreset-herding',
         n_steps = 10,
         hidden_sizes = [200, 200],
         hold_error = True,
         :
     compare_predictors(
         dataset=(get_mnist_dataset(flat=True).shorten(100) if is_test_mode() else get_mnist_dataset(flat=True)).to_onehot(),
         online_predictors={'Spiking MLP': JavaSpikingNetWrapper.from_init(
             fractional = fractional,
             depth_first = depth_first,
             smooth_grads = smooth_grads,
             back_discretize = back_discretize,
             w_init=0.01,
             rng = 1234,
             eta=0.01,
             n_steps = n_steps,
             hold_error=hold_error,
             layer_sizes=[784]+hidden_sizes+[10],
             )},
         test_epochs=[0.0, 0.05] if is_test_mode() else [0.0, 0.05, 0.1, 0.2, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4],
         minibatch_size = 1,
         report_test_scores=True,
         test_on = 'test',
         evaluation_function='percent_argmax_incorrect'
         )),
 versions={
     'Baseline': dict(),
     'Fractional-Updates': dict(fractional = True),
     'Depth-First': dict(depth_first = True),
     'Smooth-Grads': dict(smooth_grads = True),
     'BackQuant-Zero-Reset': dict(back_discretize='herding'),