def demo_perceptron_dtp( hidden_sizes = [240], n_epochs = 20, n_tests = 20, minibatch_size=100, lin_dtp = True, ): dataset = get_mnist_dataset(flat = True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP( layers=[PerceptronLayer.from_initializer(n_in, n_out, initial_mag=2, lin_dtp = lin_dtp) for n_in, n_out in zip([dataset.input_size]+hidden_sizes, hidden_sizes+[dataset.target_size])], output_cost_function = None ).compile() result = assess_online_predictor( predictor = predictor, dataset = dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs = sqrtspace(0, n_epochs, n_tests), ) plot_learning_curves(result)
def demo_compare_dtp_methods( predictor_constructors, n_epochs = 10, minibatch_size = 20, n_tests = 20, onehot = True, accumulator = None ): dataset = get_mnist_dataset(flat = True, binarize = False) n_categories = dataset.n_categories if onehot: dataset = dataset.to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 learning_curves = compare_predictors( dataset=dataset, online_predictors = {name: p(dataset.input_size, n_categories) for name, p in predictor_constructors.iteritems() if name in predictor_constructors}, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))}, accumulators=accumulator ) plot_learning_curves(learning_curves)
def demo_perceptron_dtp( hidden_sizes=[240], n_epochs=20, n_tests=20, minibatch_size=100, lin_dtp=True, ): dataset = get_mnist_dataset(flat=True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP(layers=[ PerceptronLayer.from_initializer(n_in, n_out, initial_mag=2, lin_dtp=lin_dtp) for n_in, n_out in zip([dataset.input_size] + hidden_sizes, hidden_sizes + [dataset.target_size]) ], output_cost_function=None).compile() result = assess_online_predictor( predictor=predictor, dataset=dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_tests), ) plot_learning_curves(result)
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20): dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda optimizer: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories], hidden_activation='sig', output_activation='lin', w_init = 0.01, rng = 5 ), cost_function = softmax_negative_log_likelihood, optimizer = optimizer, ).compile() return compare_predictors( dataset=dataset, online_predictors = { 'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)), 'adamax': make_mlp(AdaMax(alpha = 1e-3)), }, minibatch_size = 20, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct )
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20): dataset = get_mnist_dataset() if is_test_mode(): dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda optimizer: GradientBasedPredictor( function = MultiLayerPerceptron( layer_sizes=[hidden_size, dataset.n_categories], input_size = dataset.input_size, hidden_activation='sig', output_activation='lin', w_init = normal_w_init(mag = 0.01, seed = 5) ), cost_function = softmax_negative_log_likelihood, optimizer = optimizer, ).compile() return compare_predictors( dataset=dataset, online_predictors = { 'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)), 'adamax': make_mlp(AdaMax(alpha = 1e-3)), }, minibatch_size = 20, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct )
def demo_compare_dtp_methods(predictor_constructors, n_epochs=10, minibatch_size=20, n_tests=20, onehot=True, accumulator=None): dataset = get_mnist_dataset(flat=True, binarize=False) n_categories = dataset.n_categories if onehot: dataset = dataset.to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 learning_curves = compare_predictors( dataset=dataset, online_predictors={ name: p(dataset.input_size, n_categories) for name, p in predictor_constructors.iteritems() if name in predictor_constructors }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))}, accumulators=accumulator) plot_learning_curves(learning_curves)
def test_compare_predictors(hang_plot=False): dataset = get_synthetic_clusters_dataset() w_constructor = lambda rng=np.random.RandomState(45): .1 * rng.randn( dataset.input_shape[0], dataset.n_categories) records = compare_predictors( dataset=dataset, offline_predictors={'SVM': SVC()}, online_predictors={ 'fast-perceptron': Perceptron(alpha=0.1, w=w_constructor()).to_categorical(), 'slow-perceptron': Perceptron(alpha=0.001, w=w_constructor()).to_categorical() }, minibatch_size=10, test_epochs=sqrtspace(0, 10, 20), evaluation_function='percent_correct') assert 99 < records['SVM'].get_scores('Test') <= 100 assert 20 < records['slow-perceptron'].get_scores( 'Test')[0] < 40 and 95 < records['slow-perceptron'].get_scores( 'Test')[-1] <= 100 assert 20 < records['fast-perceptron'].get_scores( 'Test')[0] < 40 and 98 < records['fast-perceptron'].get_scores( 'Test')[-1] <= 100 plot_learning_curves(records, hang=hang_plot)
def demo_dtp_varieties(hidden_sizes=[240], n_epochs=10, minibatch_size=20, n_tests=20, hidden_activation='tanh', output_activation='sigm', optimizer='adamax', learning_rate=0.01, noise=1, predictors=['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'], rng=1234, use_bias=True, live_plot=False, plot=False): """ ; :param hidden_sizes: :param n_epochs: :param minibatch_size: :param n_tests: :return: """ if isinstance(predictors, str): predictors = [predictors] dataset = get_mnist_dataset(flat=True) dataset = dataset.process_with( targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) predictors = OrderedDict( (name, get_predictor(name, input_size=dataset.input_size, target_size=dataset.target_size, hidden_sizes=hidden_sizes, hidden_activation=hidden_activation, output_activation=output_activation, optimizer=optimizer, learning_rate=learning_rate, noise=noise, use_bias=use_bias, rng=rng)) for name in predictors) learning_curves = compare_predictors( dataset=dataset, online_predictors=predictors, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, ) if plot: plot_learning_curves(learning_curves)
def compare_example_predictors( n_epochs=5, n_tests=20, minibatch_size=10, ): """ This demo shows how we can compare different online predictors. The demo trains both predictors on the dataset, returning an object that contains the results. :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break. """ dataset = get_mnist_dataset(flat=True) # "Flatten" the 28x28 inputs to a 784-d vector if is_test_mode(): # Shorten the dataset so we run through it quickly in test mode. dataset = dataset.shorten(200) n_epochs = 1 n_tests = 3 # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest. # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and # then compile it into an IPredictor object # - The Perceptron directly implements the IPredictor interface. # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method. learning_curve_data = compare_predictors( dataset=dataset, online_predictors={ 'Perceptron': Perceptron(w=np.zeros((dataset.input_size, dataset.n_categories)), alpha=0.001). to_categorical( n_categories=dataset.n_categories ), # .to_categorical allows the perceptron to be trained on integer labels. 'MLP': GradientBasedPredictor( function=MultiLayerPerceptron.from_init( layer_sizes=[ dataset.input_size, 500, dataset.n_categories ], hidden_activation='sig', # Sigmoidal hidden units output_activation= 'softmax', # Softmax output unit, since we're doing multinomial classification w_init=0.01, rng=5), cost_function= negative_log_likelihood_dangerous, # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax. optimizer=SimpleGradientDescent(eta=0.1), ).compile(), # .compile() returns an IPredictor }, offline_predictors={'RF': RandomForestClassifier(n_estimators=40)}, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct # Compares one-hot ) # Results is a LearningCurveData object return learning_curve_data
def compare_example_predictors( n_epochs = 5, n_tests = 20, minibatch_size = 10, ): """ This demo shows how we can compare different online predictors. The demo trains both predictors on the dataset, returning an object that contains the results. :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break. """ dataset = get_mnist_dataset(flat = True) # "Flatten" the 28x28 inputs to a 784-d vector if is_test_mode(): # Shorten the dataset so we run through it quickly in test mode. dataset = dataset.shorten(200) n_epochs = 1 n_tests = 3 # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest. # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and # then compile it into an IPredictor object # - The Perceptron directly implements the IPredictor interface. # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method. learning_curve_data = compare_predictors( dataset = dataset, online_predictors = { 'Perceptron': Perceptron( w = np.zeros((dataset.input_size, dataset.n_categories)), alpha = 0.001 ).to_categorical(n_categories = dataset.n_categories), # .to_categorical allows the perceptron to be trained on integer labels. 'MLP': GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, 500, dataset.n_categories], hidden_activation='sig', # Sigmoidal hidden units output_activation='softmax', # Softmax output unit, since we're doing multinomial classification w_init = 0.01, rng = 5 ), cost_function = negative_log_likelihood_dangerous, # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax. optimizer = SimpleGradientDescent(eta = 0.1), ).compile(), # .compile() returns an IPredictor }, offline_predictors={ 'RF': RandomForestClassifier(n_estimators = 40) }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct # Compares one-hot ) # Results is a LearningCurveData object return learning_curve_data
def demo_compare_dtp_optimizers( hidden_sizes=[240], n_epochs=10, minibatch_size=20, n_tests=20, hidden_activation='tanh', ): dataset = get_mnist_dataset(flat=True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 def make_dtp_net(optimizer_constructor, output_fcn): return DifferenceTargetMLP.from_initializer( input_size=dataset.input_size, output_size=dataset.target_size, hidden_sizes=hidden_sizes, optimizer_constructor=optimizer_constructor, input_activation='sigm', hidden_activation=hidden_activation, output_activation=output_fcn, w_init_mag=0.01, noise=1, ).compile() learning_curves = compare_predictors( dataset=dataset, online_predictors={ 'SGD-0.001-softmax': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn='softmax'), 'AdaMax-0.001-softmax': make_dtp_net(lambda: AdaMax(0.001), output_fcn='softmax'), 'RMSProp-0.001-softmax': make_dtp_net(lambda: RMSProp(0.001), output_fcn='softmax'), 'SGD-0.001-sigm': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn='sigm'), 'AdaMax-0.001-sigm': make_dtp_net(lambda: AdaMax(0.001), output_fcn='sigm'), 'RMSProp-0.001-sigm': make_dtp_net(lambda: RMSProp(0.001), output_fcn='sigm'), }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, ) plot_learning_curves(learning_curves)
def demo_run_dtp_on_mnist( hidden_sizes = [240], n_epochs = 20, n_tests = 20, minibatch_size=100, input_activation = 'sigm', hidden_activation = 'tanh', output_activation = 'softmax', optimizer_constructor = lambda: RMSProp(0.001), normalize_inputs = False, local_cost_function = mean_squared_error, output_cost_function = None, noise = 1, lin_dtp = False, seed = 1234 ): dataset = get_mnist_dataset(flat = True).to_onehot() if normalize_inputs: dataset = dataset.process_with(targets_processor=multichannel(lambda x: x/np.sum(x, axis = 1, keepdims=True))) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP.from_initializer( input_size = dataset.input_size, output_size = dataset.target_size, hidden_sizes = hidden_sizes, optimizer_constructor = optimizer_constructor, # Note that RMSProp/AdaMax way outperform SGD here. # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=0.01, output_cost_function=output_cost_function, noise = noise, cost_function = local_cost_function, layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp else PreActivationDifferenceTargetLayer.from_initializer, rng = seed ).compile() result = assess_online_predictor( predictor = predictor, dataset = dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs = sqrtspace(0, n_epochs, n_tests), test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28)) ) plot_learning_curves(result)
def demo_run_dtp_on_mnist(hidden_sizes=[240], n_epochs=20, n_tests=20, minibatch_size=100, input_activation='sigm', hidden_activation='tanh', output_activation='softmax', optimizer_constructor=lambda: RMSProp(0.001), normalize_inputs=False, local_cost_function=mean_squared_error, output_cost_function=None, noise=1, lin_dtp=False, seed=1234): dataset = get_mnist_dataset(flat=True).to_onehot() if normalize_inputs: dataset = dataset.process_with(targets_processor=multichannel( lambda x: x / np.sum(x, axis=1, keepdims=True))) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 predictor = DifferenceTargetMLP.from_initializer( input_size=dataset.input_size, output_size=dataset.target_size, hidden_sizes=hidden_sizes, optimizer_constructor= optimizer_constructor, # Note that RMSProp/AdaMax way outperform SGD here. # input_activation=input_activation, hidden_activation=hidden_activation, output_activation=output_activation, w_init_mag=0.01, output_cost_function=output_cost_function, noise=noise, cost_function=local_cost_function, layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp else PreActivationDifferenceTargetLayer.from_initializer, rng=seed).compile() result = assess_online_predictor( predictor=predictor, dataset=dataset, minibatch_size=minibatch_size, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_tests), test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w. get_value().T.reshape(-1, 28, 28))) plot_learning_curves(result)
def demo_dtp_varieties( hidden_sizes = [240], n_epochs = 10, minibatch_size = 20, n_tests = 20, hidden_activation = 'tanh', output_activation = 'sigm', optimizer = 'adamax', learning_rate = 0.01, noise = 1, predictors = ['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'], rng = 1234, live_plot = False, plot = False ): """ ; :param hidden_sizes: :param n_epochs: :param minibatch_size: :param n_tests: :return: """ if isinstance(predictors, str): predictors = [predictors] dataset = get_mnist_dataset(flat = True) dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) predictors = OrderedDict((name, get_predictor(name, input_size = dataset.input_size, target_size=dataset.target_size, hidden_sizes=hidden_sizes, hidden_activation=hidden_activation, output_activation = output_activation, optimizer=optimizer, learning_rate=learning_rate, noise = noise, rng = rng)) for name in predictors) learning_curves = compare_predictors( dataset=dataset, online_predictors = predictors, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, ) if plot: plot_learning_curves(learning_curves)
def mlp_normalization(hidden_size=300, n_epochs=30, n_tests=50, minibatch_size=20): """ Compare mlp with different schemes for normalizing input. regular: Regular vanilla MLP normalize: Mean-subtract/normalize over minibatch normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable (per-unit) scale parameter. Conclusions: No significant benefit to scale parameter. Normalizing gives a head start but incurs a small cost later on. But really all classifiers are quite similar. :param hidden_size: Size of hidden layer """ dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda normalize, scale: GradientBasedPredictor( function=MultiLayerPerceptron.from_init(layer_sizes=[ dataset.input_size, hidden_size, dataset.n_categories ], hidden_activation='sig', output_activation='lin', normalize_minibatch=normalize, scale_param=scale, w_init=0.01, rng=5), cost_function=softmax_negative_log_likelihood, optimizer=SimpleGradientDescent(eta=0.1), ).compile() return compare_predictors(dataset=dataset, online_predictors={ 'regular': make_mlp(normalize=False, scale=False), 'normalize': make_mlp(normalize=True, scale=False), 'normalize and scale': make_mlp(normalize=True, scale=True), }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct)
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20): """ Compare mlp with different schemes for normalizing input. regular: Regular vanilla MLP normalize: Mean-subtract/normalize over minibatch normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable (per-unit) scale parameter. Conclusions: No significant benefit to scale parameter. Normalizing gives a head start but incurs a small cost later on. But really all classifiers are quite similar. :param hidden_size: Size of hidden layer """ dataset = get_mnist_dataset() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 make_mlp = lambda normalize, scale: GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories], hidden_activation='sig', output_activation='lin', normalize_minibatch=normalize, scale_param=scale, w_init = 0.01, rng = 5 ), cost_function = softmax_negative_log_likelihood, optimizer = SimpleGradientDescent(eta = 0.1), ).compile() return compare_predictors( dataset=dataset, online_predictors = { 'regular': make_mlp(normalize = False, scale = False), 'normalize': make_mlp(normalize=True, scale = False), 'normalize and scale': make_mlp(normalize=True, scale = True), }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct )
def demo_mnist_online_regression( minibatch_size = 10, learning_rate = 0.1, optimizer = 'sgd', regressor_type = 'multinomial', n_epochs = 20, n_test_points = 30, max_training_samples = None, include_biases = True, ): """ Train an MLP on MNIST and print the test scores as training progresses. """ if is_test_mode(): n_test_points = 3 minibatch_size = 5 n_epochs = 0.01 dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True) else: dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True) assert regressor_type in ('multinomial', 'logistic', 'linear') n_outputs = dataset.n_categories if regressor_type in ('logistic', 'linear'): dataset = dataset.to_onehot() predictor = OnlineRegressor( input_size = dataset.input_size, output_size = n_outputs, regressor_type = regressor_type, optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate), include_biases = include_biases ).compile() # Train and periodically report the test score. results = assess_online_predictor( dataset=dataset, predictor=predictor, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_test_points), minibatch_size=minibatch_size ) plot_learning_curves(results)
def backprop_vs_difference_target_prop(hidden_sizes=[240], n_epochs=10, minibatch_size=20, n_tests=20): dataset = get_mnist_dataset(flat=True) dataset = dataset.process_with( targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) return compare_predictors( dataset=dataset, online_predictors={ 'backprop-mlp': GradientBasedPredictor( function=MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size] + hidden_sizes + [dataset.n_categories], hidden_activation='tanh', output_activation='sig', w_init=0.01, rng=5), cost_function=mean_squared_error, optimizer=AdaMax(0.01), ).compile(), 'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer( input_size=dataset.input_size, output_size=dataset.target_size, hidden_sizes=hidden_sizes, optimizer_constructor=lambda: AdaMax(0.01), w_init=0.01, noise=1, ).compile() }, minibatch_size=minibatch_size, test_epochs=sqrtspace(0, n_epochs, n_tests), evaluation_function=percent_argmax_correct, )
def demo_compare_dtp_optimizers( hidden_sizes = [240], n_epochs = 10, minibatch_size = 20, n_tests = 20, hidden_activation = 'tanh', ): dataset = get_mnist_dataset(flat = True).to_onehot() if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 1 n_tests = 2 def make_dtp_net(optimizer_constructor, output_fcn): return DifferenceTargetMLP.from_initializer( input_size = dataset.input_size, output_size = dataset.target_size, hidden_sizes = hidden_sizes, optimizer_constructor = optimizer_constructor, input_activation='sigm', hidden_activation=hidden_activation, output_activation=output_fcn, w_init_mag=0.01, noise = 1, ).compile() learning_curves = compare_predictors( dataset=dataset, online_predictors = { 'SGD-0.001-softmax': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'softmax'), 'AdaMax-0.001-softmax': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'softmax'), 'RMSProp-0.001-softmax': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'softmax'), 'SGD-0.001-sigm': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'sigm'), 'AdaMax-0.001-sigm': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'sigm'), 'RMSProp-0.001-sigm': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'sigm'), }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, ) plot_learning_curves(learning_curves)
def backprop_vs_difference_target_prop( hidden_sizes = [240], n_epochs = 10, minibatch_size = 20, n_tests = 20 ): dataset = get_mnist_dataset(flat = True) dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), )) if is_test_mode(): dataset = dataset.shorten(200) n_epochs = 0.1 n_tests = 3 set_default_figure_size(12, 9) return compare_predictors( dataset=dataset, online_predictors = { 'backprop-mlp': GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size]+hidden_sizes+[dataset.n_categories], hidden_activation='tanh', output_activation='sig', w_init = 0.01, rng = 5 ), cost_function = mean_squared_error, optimizer = AdaMax(0.01), ).compile(), 'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer( input_size = dataset.input_size, output_size = dataset.target_size, hidden_sizes = hidden_sizes, optimizer_constructor = lambda: AdaMax(0.01), w_init=0.01, noise = 1, ).compile() }, minibatch_size = minibatch_size, test_epochs = sqrtspace(0, n_epochs, n_tests), evaluation_function = percent_argmax_correct, )
def test_compare_predictors_old(): x_tr, y_tr, x_ts, y_ts, w_true = get_logistic_regression_data(noise_factor = 0.1) dataset = DataSet(DataCollection(x_tr, y_tr), DataCollection(x_ts, y_ts)).process_with(targets_processor=lambda (x, ): (OneHotEncoding()(x[:, 0]), )) w_init = 0.1*np.random.randn(dataset.training_set.input.shape[1], dataset.training_set.target.shape[1]) records = compare_predictors_old( dataset = dataset, offline_predictor_constructors={ 'Optimal': lambda: MockPredictor(lambda x: sigm(x.dot(w_true))) }, online_predictor_constructors={ 'fast-perceptron': lambda: Perceptron(alpha = 0.1, w = w_init.copy()), 'slow-perceptron': lambda: Perceptron(alpha = 0.001, w = w_init.copy()) }, minibatch_size = 10, test_points = sqrtspace(0, 10, 20), evaluation_function='mse' ) plot_learning_curves(records, hang = False)
def test_compare_predictors(hang_plot = False): dataset = get_synthetic_clusters_dataset() w_constructor = lambda rng = np.random.RandomState(45): .1*rng.randn(dataset.input_shape[0], dataset.n_categories) records = compare_predictors( dataset = dataset, offline_predictors={ 'SVM': SVC() }, online_predictors={ 'fast-perceptron': Perceptron(alpha = 0.1, w = w_constructor()).to_categorical(), 'slow-perceptron': Perceptron(alpha = 0.001, w = w_constructor()).to_categorical() }, minibatch_size = 10, test_epochs = sqrtspace(0, 10, 20), evaluation_function='percent_correct' ) assert 99 < records['SVM'].get_scores('Test') <= 100 assert 20 < records['slow-perceptron'].get_scores('Test')[0] < 40 and 95 < records['slow-perceptron'].get_scores('Test')[-1] <= 100 assert 20 < records['fast-perceptron'].get_scores('Test')[0] < 40 and 98 < records['fast-perceptron'].get_scores('Test')[-1] <= 100 plot_learning_curves(records, hang = hang_plot)
def compare_predictors(dataset, online_predictors={}, offline_predictors={}, minibatch_size = 'full', evaluation_function = 'mse', test_epochs = sqrtspace(0, 1, 10), report_test_scores = True, test_on = 'training+test', test_batch_size = None, accumulators = None): """ Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor. :param dataset: A DataSet object :param online_predictors: A dict<str:IPredictor> of online predictors. An online predictor is sequentially fed minibatches of data and updates its parameters with each minibatch. :param offline_predictors: A dict<str:object> of offline predictors. Offline predictors obey sklearn's Estimator/Predictor interfaces - ie they methods estimator = object.fit(data, targets) and prediction = object.predict(data) :param minibatch_size: Size of the minibatches to use for online predictors. Can be: An int, in which case it represents the minibatch size for all classifiers. A dict<str: int>, in which case you can set the minibatch size per-classifier. In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration. :param test_epochs: Test points to use for online predictors. Can be: A list of integers - in which case the classifier is after seeing this many samples. A list of floats - in which case the classifier is tested after seeing this many epochs. 'always' - In which case a test is performed after every training step The final test point determines the end of training. :param evaluation_function: Function used to evaluate output of predictors :param report_test_scores: Boolean indicating whether you'd like to report results online. :return: An OrderedDict<LearningCurveData> """ all_keys = online_predictors.keys()+offline_predictors.keys() assert len(all_keys) > 0, 'You have to give at least one predictor. Is that too much to ask?' assert len(all_keys) == len(np.unique(all_keys)), "You have multiple predictors using the same names. Change that." type_constructor_dict = OrderedDict( [(k, ('offline', offline_predictors[k])) for k in sorted(offline_predictors.keys())] + [(k, ('online', online_predictors[k])) for k in sorted(online_predictors.keys())] ) if not isinstance(minibatch_size, dict): minibatch_size = {predictor_name: minibatch_size for predictor_name in online_predictors.keys()} else: assert online_predictors.viewkeys() == minibatch_size.viewkeys() if not isinstance(accumulators, dict): accumulators = {predictor_name: accumulators for predictor_name in online_predictors.keys()} else: assert online_predictors.viewkeys() == accumulators.viewkeys() test_epochs = np.array(test_epochs) # test_epochs_float = test_epochs.dtype == float # if test_epochs_float: # test_epochs = (test_epochs * dataset.training_set.n_samples).astype(int) if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) records = OrderedDict() # Run the offline predictors for predictor_name, (predictor_type, predictor) in type_constructor_dict.iteritems(): print '%s\nRunning predictor %s\n%s' % ('='*20, predictor_name, '-'*20) records[predictor_name] = \ assess_offline_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, report_test_scores = report_test_scores, test_on = test_on, test_batch_size = test_batch_size ) if predictor_type == 'offline' else \ assess_online_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, test_epochs = test_epochs, accumulator = accumulators[predictor_name], minibatch_size = minibatch_size[predictor_name], report_test_scores = report_test_scores, test_on = test_on, test_batch_size = test_batch_size, ) if predictor_type == 'online' else \ bad_value(predictor_type) print 'Done!' return records
def compare_predictors_old(dataset, online_predictor_constructors={}, offline_predictor_constructors={}, incremental_predictor_constructors={}, minibatch_size=1, test_points=sqrtspace(0, 1, 10), evaluation_function='mse', report_test_scores=True, on_construction_callback=None): """ DEPRECATED! See above Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor. :param dataset: A DataSet object :param online_predictor_constructors: A dict<str:function> of online predictors. An online predictor is sequentially fed minibatches of data and updates its parameters with each minibatch. :param offline_predictor_constructors: A dict<str:function> of offline predictors. An offline predictor trains just once on the full training data, and then makes a prediction on the test data. Unlike Online, Incremental predictors, an Offline predictor has no initial state, so it doesn't make sense to ask it to predict before any training has been done. :param incremental_predictor_constructors: A dict<str:function> of incremental predictors. An incremental predictor gets the whole dataset in each pass, and updates its parameters each time. :param minibatch_size: Size of the minibatches to use for online predictors. Can be: An int, in which case it represents the minibatch size for all classifiers. A dict<str: int>, in which case you can set the minibatch size per-classifier. In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration. :param test_points: Test points to use. Can be: A list of integers - in which case the classifier is after seeing this many samples. A list of floats - in which case the classifier is tested after seeing this many epochs. 'always' - In which case a test is performed after every training step The final test point determines the end of training. :param evaluation_function: Function used to evaluate output of predictors :param report_test_scores: Boolean indicating whether you'd like to report results online. :param on_construction_callback: A function of the form callback(predictor) that is called when a predictor is constructed. This may be useful for debugging. :return: An OrderedDict<LearningCurveData> """ all_keys = online_predictor_constructors.keys( ) + offline_predictor_constructors.keys( ) + incremental_predictor_constructors.keys() assert len(all_keys) > 0, 'You have to give at least one predictor.' assert len(all_keys) == len( np.unique(all_keys) ), "You have multiple predictors using the same names. Change that." type_constructor_dict = OrderedDict( [(k, ('offline', offline_predictor_constructors[k])) for k in sorted(offline_predictor_constructors.keys())] + [(k, ('online', online_predictor_constructors[k])) for k in sorted(online_predictor_constructors.keys())] + [(k, ('incremental', incremental_predictor_constructors[k])) for k in sorted(incremental_predictor_constructors.keys())]) if isinstance(minibatch_size, int): minibatch_size = { predictor_name: minibatch_size for predictor_name in online_predictor_constructors.keys() } else: assert online_predictor_constructors.viewkeys( ) == minibatch_size.viewkeys() test_points = np.array(test_points) test_points_float = test_points.dtype == float if test_points_float: test_points = (test_points * dataset.training_set.n_samples).astype(int) if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) records = OrderedDict() # Run the offline predictors for predictor_name, ( predictor_type, predictor_constructor) in type_constructor_dict.iteritems(): predictor = predictor_constructor() if on_construction_callback is not None: on_construction_callback(predictor) print '%s\nRunning predictor %s\n%s' % ('=' * 20, predictor_name, '-' * 20) records[predictor_name] = \ assess_offline_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, report_test_scores = report_test_scores ) if predictor_type == 'offline' else \ assess_online_predictor_old( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, test_points = test_points, minibatch_size = minibatch_size[predictor_name], report_test_scores = report_test_scores ) if predictor_type == 'online' else \ assess_incremental_predictor_old( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, sampling_points = np.sort(np.unique(np.ceil(test_points/dataset.training_set.n_samples).astype(int))), accumulation_function='mean', report_test_scores = report_test_scores ) print 'Done!' return records
def demo_mnist_mlp( minibatch_size = 10, learning_rate = 0.1, optimizer = 'sgd', hidden_sizes = [300], w_init = 0.01, hidden_activation = 'tanh', output_activation = 'softmax', cost = 'nll-d', visualize_params = False, n_test_points = 30, n_epochs = 10, max_training_samples = None, use_bias = True, onehot = False, rng = 1234, plot = False, ): """ Train an MLP on MNIST and print the test scores as training progresses. """ if is_test_mode(): n_test_points = 3 minibatch_size = 5 n_epochs = 0.01 dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30) else: dataset = get_mnist_dataset(n_training_samples=max_training_samples) if onehot: dataset = dataset.to_onehot() if minibatch_size == 'full': minibatch_size = dataset.training_set.n_samples optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate) # Setup the training and test functions predictor = GradientBasedPredictor( function = MultiLayerPerceptron.from_init( layer_sizes=[dataset.input_size]+hidden_sizes+[10], hidden_activation=hidden_activation, output_activation=output_activation, w_init = w_init, use_bias=use_bias, rng = rng, ), cost_function=cost, optimizer=optimizer ).compile() # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays. def vis_callback(xx): p = predictor.symbolic_predictor._function in_layer = { 'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28), 'Layer[0].b': p.layers[0].linear_transform._b.get_value(), } other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])] dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], []))) # Train and periodically report the test score. results = assess_online_predictor( dataset=dataset, predictor=predictor, evaluation_function='percent_argmax_correct', test_epochs=sqrtspace(0, n_epochs, n_test_points), minibatch_size=minibatch_size, test_callback=vis_callback if visualize_params else None ) if plot: plot_learning_curves(results)
def compare_predictors(dataset, online_predictors={}, offline_predictors={}, minibatch_size='full', evaluation_function='mse', test_epochs=sqrtspace(0, 1, 10), report_test_scores=True, test_on='training+test', test_batch_size=None, accumulators=None, online_test_callbacks={}): """ Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor. :param dataset: A DataSet object :param online_predictors: A dict<str:IPredictor> of online predictors. An online predictor is sequentially fed minibatches of data and updates its parameters with each minibatch. :param offline_predictors: A dict<str:object> of offline predictors. Offline predictors obey sklearn's Estimator/Predictor interfaces - ie they methods estimator = object.fit(data, targets) and prediction = object.predict(data) :param minibatch_size: Size of the minibatches to use for online predictors. Can be: An int, in which case it represents the minibatch size for all classifiers. A dict<str: int>, in which case you can set the minibatch size per-classifier. In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration. :param test_epochs: Test points to use for online predictors. Can be: A list of integers - in which case the classifier is after seeing this many samples. A list of floats - in which case the classifier is tested after seeing this many epochs. 'always' - In which case a test is performed after every training step The final test point determines the end of training. :param evaluation_function: Function used to evaluate output of predictors :param report_test_scores: Boolean indicating whether you'd like to report results online. :param test_on: 'training', 'test', 'training+test' :param test_batch_size: When the test set is too large to process in one step, use this to break it up into chunks. :param accumulators: A dict<str: accum_fcn>, where accum_fcn is a stateful-function of the form: accmulated_output = accum_fcn(this_output) Special case: accum_fcn can be 'avg' to make a running average. :param online_test_callbacks: A dict<str: fcn> where fcn is a callback that takes an online predictor as an argument. Useful for logging/plotting/debugging progress during training. :return: An OrderedDict<LearningCurveData> """ all_keys = online_predictors.keys() + offline_predictors.keys() assert len( all_keys ) > 0, 'You have to give at least one predictor. Is that too much to ask?' assert len(all_keys) == len( np.unique(all_keys) ), "You have multiple predictors using the same names. Change that." type_constructor_dict = OrderedDict( [(k, ('offline', offline_predictors[k])) for k in sorted(offline_predictors.keys())] + [(k, ('online', online_predictors[k])) for k in sorted(online_predictors.keys())]) minibatch_size = _pack_into_dict(minibatch_size, expected_keys=online_predictors.keys()) accumulators = _pack_into_dict(accumulators, expected_keys=online_predictors.keys()) online_test_callbacks = _pack_into_dict( online_test_callbacks, expected_keys=online_predictors.keys(), allow_subset=True) test_epochs = np.array(test_epochs) if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) records = OrderedDict() # Run the offline predictors for predictor_name, (predictor_type, predictor) in type_constructor_dict.iteritems(): print '%s\nRunning predictor %s\n%s' % ('=' * 20, predictor_name, '-' * 20) records[predictor_name] = \ assess_offline_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, report_test_scores = report_test_scores, test_on = test_on, test_batch_size = test_batch_size ) if predictor_type == 'offline' else \ assess_online_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, test_epochs = test_epochs, accumulator = accumulators[predictor_name], minibatch_size = minibatch_size[predictor_name], report_test_scores = report_test_scores, test_on = test_on, test_batch_size = test_batch_size, test_callback=online_test_callbacks[predictor_name] if predictor_name in online_test_callbacks else None ) if predictor_type == 'online' else \ bad_value(predictor_type) print 'Done!' return records
def compare_predictors_old(dataset, online_predictor_constructors = {}, offline_predictor_constructors = {}, incremental_predictor_constructors = {}, minibatch_size = 1, test_points = sqrtspace(0, 1, 10), evaluation_function = 'mse', report_test_scores = True, on_construction_callback = None): """ DEPRECATED! See above Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor. :param dataset: A DataSet object :param online_predictor_constructors: A dict<str:function> of online predictors. An online predictor is sequentially fed minibatches of data and updates its parameters with each minibatch. :param offline_predictor_constructors: A dict<str:function> of offline predictors. An offline predictor trains just once on the full training data, and then makes a prediction on the test data. Unlike Online, Incremental predictors, an Offline predictor has no initial state, so it doesn't make sense to ask it to predict before any training has been done. :param incremental_predictor_constructors: A dict<str:function> of incremental predictors. An incremental predictor gets the whole dataset in each pass, and updates its parameters each time. :param minibatch_size: Size of the minibatches to use for online predictors. Can be: An int, in which case it represents the minibatch size for all classifiers. A dict<str: int>, in which case you can set the minibatch size per-classifier. In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration. :param test_points: Test points to use. Can be: A list of integers - in which case the classifier is after seeing this many samples. A list of floats - in which case the classifier is tested after seeing this many epochs. 'always' - In which case a test is performed after every training step The final test point determines the end of training. :param evaluation_function: Function used to evaluate output of predictors :param report_test_scores: Boolean indicating whether you'd like to report results online. :param on_construction_callback: A function of the form callback(predictor) that is called when a predictor is constructed. This may be useful for debugging. :return: An OrderedDict<LearningCurveData> """ all_keys = online_predictor_constructors.keys()+offline_predictor_constructors.keys()+incremental_predictor_constructors.keys() assert len(all_keys) > 0, 'You have to give at least one predictor.' assert len(all_keys) == len(np.unique(all_keys)), "You have multiple predictors using the same names. Change that." type_constructor_dict = OrderedDict( [(k, ('offline', offline_predictor_constructors[k])) for k in sorted(offline_predictor_constructors.keys())] + [(k, ('online', online_predictor_constructors[k])) for k in sorted(online_predictor_constructors.keys())] + [(k, ('incremental', incremental_predictor_constructors[k])) for k in sorted(incremental_predictor_constructors.keys())] ) if isinstance(minibatch_size, int): minibatch_size = {predictor_name: minibatch_size for predictor_name in online_predictor_constructors.keys()} else: assert online_predictor_constructors.viewkeys() == minibatch_size.viewkeys() test_points = np.array(test_points) test_points_float = test_points.dtype == float if test_points_float: test_points = (test_points * dataset.training_set.n_samples).astype(int) if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) records = OrderedDict() # Run the offline predictors for predictor_name, (predictor_type, predictor_constructor) in type_constructor_dict.iteritems(): predictor = predictor_constructor() if on_construction_callback is not None: on_construction_callback(predictor) print '%s\nRunning predictor %s\n%s' % ('='*20, predictor_name, '-'*20) records[predictor_name] = \ assess_offline_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, report_test_scores = report_test_scores ) if predictor_type == 'offline' else \ assess_online_predictor_old( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, test_points = test_points, minibatch_size = minibatch_size[predictor_name], report_test_scores = report_test_scores ) if predictor_type == 'online' else \ assess_incremental_predictor_old( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, sampling_points = np.sort(np.unique(np.ceil(test_points/dataset.training_set.n_samples).astype(int))), accumulation_function='mean', report_test_scores = report_test_scores ) print 'Done!' return records