from neural_network import NeuralNetwork from mnist_data import get_mnist_data import time import numpy as np import cPickle as pickle #Automated simulations to record accuracy, runtime, number of iterations. #This is done 15 times for each (algorithm, noise type) tuple. #Results save to .pkl files. #training: 55,000 examples. validation: 5,000 examples. testing: 10,000 examples. train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = get_mnist_data( ) optimizer_parameters = {} optimizer_parameters['OriginalGradientDescent'] = {'learning_rate': 0.5} optimizer_parameters['CustomGradientDescent'] = {'learning_rate': 0.5} optimizer_parameters['OriginalAdam'] = {} optimizer_parameters['CustomAdam'] = {} optimizer_parameters['LBFGS'] = {'max_hist': 1000} optimizer_parameters['ConjugateGradient'] = { 'learning_rate': 0.0001, 'min_step': 0.02 } optimizer_parameters['HessianFree'] = {} def run_single_set(num_runs, num_hidden_layers, num_hidden_nodes, auto_terminate_num_iter,
import numpy as np from mnist_data import get_mnist_data from MLP import MLP from MLP_SGD import MLP_SGD # get data from mnist_data file test_data, test_targets, train_data, train_targets = get_mnist_data() # initialize a Stochaistic Gradient Descent Multi Layer Perceptron mlp_sgd = MLP_SGD(lr=1, sizes=[784, 30, 16, 10], activation_list=['sigmoid', 'softmax']) outputs1 = mlp_sgd.forward(test_data) print("Before update: ", mlp_sgd.loss(outputs1, test_targets)) print("Accuracy: ", mlp_sgd.evaluate(test_data, test_targets)) mlp_sgd.fit(train_data, train_targets, 5, 10) outputs2 = mlp_sgd.forward(test_data) print("Loss after update: ", mlp_sgd.loss(outputs2, test_targets)) print("Accuracy: ", mlp_sgd.evaluate(test_data, test_targets))
def test_network_lesson09(self): """ Test initializing a neural NeuralNetwork """ input_shape = (28, 28, 1) output_count = 10 network = NeuralNetwork('convnet for MNIST', input_shape, output_count) # normal distribution parameters for random weights mean = 0.0 stddev = 0.1 # General convolution shapes and parameters common to all convolutional layers conv_stride_shape = (1, 1) conv_pad_shape = (0, 0) conv_pad_type = 'SAME' pool_stride_shape = (2, 2) pool_shape = (2, 2) pool_pad_type = 'SAME' activation = 'relu' # Kernel depths and sizes for each convolution layer depths = [32, 64, 128] kernel_shapes = [(5, 5, depths[0]), (5, 5, depths[1]), (5, 5, depths[2])] conv_layer_count = len(depths) # Expected values for assertions after_conv_output_shapes = [(28, 28, depths[0]), (14, 14, depths[1]), (7, 7, depths[2])] after_pool_output_shapes = [(14, 14, depths[0]), (7, 7, depths[1]), (4, 4, depths[2])] # Create convolutional layers conv = None for i in range(conv_layer_count): name = 'l{:d}'.format(i) if i > 0: input_shape = conv.output_shape conv = ConvolutionalLayer(name, input_shape, kernel_shapes[i], conv_stride_shape, \ conv_pad_shape, conv_pad_type, activation) self.assertEqual(after_conv_output_shapes[i], conv.output_shape) conv.add_pooling('max', pool_shape, pool_stride_shape, pool_pad_type) self.assertEqual(after_pool_output_shapes[i], conv.output_shape) network.add_layer(conv, mean, stddev) # Create linear layers # Output sizes for linear layers linear_input_sizes = [4 * 4 * 128, 512] linear_output_sizes = [512, 10] linear_activations = ['tanh', None] for i, input_size in enumerate(linear_input_sizes): layer_index = i + conv_layer_count name = 'l{:d}'.format(layer_index) linear = LinearLayer(name, input_size, linear_output_sizes[i], linear_activations[i]) network.add_layer(linear, mean, stddev) # MNIST classify 10 digits network.define_network() learning_rate = 0.001 network.define_operations(learning_rate, 'gradient_descent') epochs = 10 batch_size = 128 saver = tf.train.Saver() (train_inputs, train_labels, valid_inputs, valid_labels, test_inputs, test_labels) = \ get_mnist_data() with tf.Session() as sess: sess.run(tf.initialize_all_variables()) network.train_with_validate(sess, train_inputs, train_labels, valid_inputs, \ valid_labels, epochs, batch_size) test_accuracy = network.evaluate_in_batches(sess, test_inputs, test_labels, batch_size) print("Test accuracy:", test_accuracy) saver.save(sess, 'convnet') print("Model saved")
def main(): """Takes the MLP class for a test drive""" parser = argparse.ArgumentParser(description='Train MLP on MNIST dataset') parser.add_argument( '-mi', '--max_iter', required=False, default=50, type=int, help='Number of iterations for stochastic gradient descent') parser.add_argument('-HL_0', '--HL_0', type=int, required=False, default=784, help='Set the size of the input layer.') parser.add_argument('-HL_1', '--HL_1', type=int, required=False, default=500, help='Set the size of the second layer.') parser.add_argument('-HL_2', '--HL_2', type=int, required=False, default=500, help='Set the size of the third layer.') parser.add_argument('-HL_3', '--HL_3', type=int, required=False, default=500, help='Set the size of the fourth layer.') parser.add_argument('-HL_4', '--HL_4', type=int, required=False, default=10, help='Set the size of the output layer.') parser.add_argument( '-bs', '--batch_size', required=False, type=int, default=100, help= 'Set the size of the random samples chosen in each stochastic gradient computation.' ) parser.add_argument( '-lr', '--learning_rate', required=False, type=float, help='Set the learning rate for the stochastic gradient descent.', default=0.03) parser.add_argument( '-rp', '--reg_param', required=False, type=float, default=0, help='Set weight parameter for regularization penalty term.') parser.add_argument( '-ot', '--output_type', required=False, default='softmax', help='Set the type of the output layer activation function.', choices=['sigmoid', 'softmax']) opts = vars(parser.parse_args()) max_iter = opts['max_iter'] HL_0 = opts['HL_0'] HL_1 = opts['HL_1'] HL_2 = opts['HL_2'] HL_3 = opts['HL_3'] HL_4 = opts['HL_4'] reg_param = opts['reg_param'] output_type = opts['output_type'] batch_size = opts['batch_size'] learning_rate = opts['learning_rate'] print("Getting data...") X_train, y_train, X_test, y_test = get_mnist_data() print("Got data. Creating...") model = MLP() model.add_layer(HL_0) model.add_layer(HL_1) model.add_layer(HL_2) model.add_layer(HL_3) model.add_layer(HL_4, output_type) model.fix() input("Created model. Press enter to view blue print.") print(model) input("Press enter to fit model on training data.") # testing fit method model.fit(X_train=X_train, y_train=y_train, reg_param=reg_param, batch_size=batch_size, max_iter=max_iter, learning_rate=learning_rate) input("Press enter to transform test data.") print("Predicting...") # testing transform method y_pred_test = model.transform(X_test) input("Test data transformed. Press enter to view evaluation summary.") #testing evaluate method model.evaluate(X_train, y_train) print(classification_report(y_test, y_pred_test)) input("Press enter to quit.")
def test_network_lenet(self): """ Test using the lenet5 architecture """ input_shape = (32, 32, 1) output_count = 10 network = NeuralNetwork('lenet5 for MNIST', input_shape, output_count) # normal distribution parameters for random weights mean = 0.0 stddev = 0.1 # General convolution shapes and parameters common to all convolutional layers conv_stride_shape = (1, 1) conv_pad_shape = (0, 0) conv_pad_type = 'VALID' pool_stride_shape = (2, 2) pool_shape = (2, 2) pool_pad_type = 'VALID' activation = 'relu' # Kernel depths and sizes for each convolution layer depths = [6, 16] kernel_shapes = [(5, 5, depths[0]), (5, 5, depths[1])] conv_layer_count = len(depths) # Create convolutional layers conv = None for i in range(conv_layer_count): name = 'l{:d}'.format(i) if i > 0: input_shape = conv.output_shape conv = ConvolutionalLayer(name, input_shape, kernel_shapes[i], conv_stride_shape, \ conv_pad_shape, conv_pad_type, activation) conv.add_pooling('max', pool_shape, pool_stride_shape, pool_pad_type) network.add_layer(conv, mean, stddev) # Linear layer dimensions linear_input_sizes = [400, 120, 84] linear_output_sizes = [120, 84, 10] linear_activations = ['relu', 'relu', None] # Create linear layers for i, input_size in enumerate(linear_input_sizes): layer_index = i + conv_layer_count name = 'l{:d}'.format(layer_index) linear = LinearLayer(name, input_size, linear_output_sizes[i], linear_activations[i]) linear.init_weights_and_biases(mean, stddev) network.add_layer(linear, mean, stddev) network.define_network() learning_rate = 0.001 network.define_operations(learning_rate, 'adam') # Prepare data (train_inputs, train_labels, valid_inputs, valid_labels, test_inputs, test_labels) = \ get_mnist_data(padding=(2, 2)) epochs = 10 batch_size = 128 saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.initialize_all_variables()) network.train_with_validate(sess, train_inputs, train_labels, valid_inputs, \ valid_labels, epochs, batch_size) test_accuracy = network.evaluate_in_batches(sess, test_inputs, test_labels, batch_size) print("Test accuracy:", test_accuracy) saver.save(sess, 'lenet') print("Model saved")