def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) if X_train.shape[1] == 784: X_train = pre_process_images(X_train) if X_test.shape[1] == 784: X_test = pre_process_images(X_test) if X_val.shape[1] == 784: X_val = pre_process_images(X_val) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.w += -1 * learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter ): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} if X_train.shape[1] == 784: X_train = pre_process_images(X_train) if X_test.shape[1] == 784: X_test = pre_process_images(X_test) if X_val.shape[1] == 784: X_val = pre_process_images(X_val) # Intialize our model model = SoftmaxModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.w += -1 * learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def predictAndDisplay( ): #This is just a test function that predicts a random handwritten number from the dataset. X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) index = random.randint(0, X_train.shape[0]) image = np.array([X_train[index]]) printable = X_train[index] model = pickle.load(open('model3a.sav', 'rb')) a = model.forward(image) predicted = (np.argmax(a)) image_2d = printable[:-1].reshape(28, 28) label = "predicted :" + str(predicted) plt.imshow(image_2d) plt.title(label) plt.show()
accuracy_train = calculate_accuracy(X_train, Y_train, self.model) accuracy_val = calculate_accuracy(X_val, Y_val, self.model) return loss, accuracy_train, accuracy_val if __name__ == "__main__": # hyperparameters DO NOT CHANGE IF NOT SPECIFIED IN ASSIGNMENT TEXT num_epochs = 50 learning_rate = 0.01 batch_size = 128 l2_reg_lambda = 0 shuffle_dataset = True # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Intialize model model = SoftmaxModel(l2_reg_lambda) # Train model trainer = SoftmaxTrainer( model, learning_rate, batch_size, shuffle_dataset, X_train,
import numpy as np import utils from task2a import one_hot_encode, pre_process_images, SoftmaxModel, gradient_approximation_test if __name__ == "__main__": # Simple test on one-hot encoding Y = np.zeros((1, 1), dtype=int) Y[0, 0] = 3 Y = one_hot_encode(Y, 10) assert Y[0, 3] == 1 and Y.sum() == 1, \ f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}" X_train, Y_train, *_ = utils.load_full_mnist(0.1) mean = np.mean(X_train) std = np.std(X_train) X_train = pre_process_images(X_train, mean, std) Y_train = one_hot_encode(Y_train, 10) assert X_train.shape[1] == 785,\ f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}" # Modify your network here neurons_per_layer = [64, 64, 10] use_improved_sigmoid = True use_improved_weight_init = True model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) # Gradient approximation check for 100 images X_train = X_train[:100] Y_train = Y_train[:100] for layer_idx, w in enumerate(model.ws):
f"Calculated gradient is incorrect. " \ f"Approximation: {gradient_approximation}, actual gradient: {model.grad[i, j]}\n" \ f"If this test fails there could be errors in your cross entropy loss function, " \ f"forward function or backward function" if __name__ == "__main__": # Simple test on one-hot encoding Y = np.zeros((1, 1), dtype=int) Y[0, 0] = 3 Y = one_hot_encode(Y, 10) assert Y[0, 3] == 1 and Y.sum() == 1, \ f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}" X_train, Y_train, *_ = utils.load_full_mnist(0.1) X_train = pre_process_images(X_train) Y_train = one_hot_encode(Y_train, 10) assert X_train.shape[1] == 785,\ f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}" # Simple test for forward pass. Note that this does not cover all errors! model = SoftmaxModel(0.0) logits = model.forward(X_train) np.testing.assert_almost_equal( logits.mean(), 1/10, err_msg="Since the weights are all 0's, the softmax activation should be 1/10") # Gradient approximation check for 100 images X_train = X_train[:100] Y_train = Y_train[:100] for i in range(2):
return model, train_loss, val_loss, train_accuracy, val_accuracy if __name__ == "__main__": # Measure execution time start = time.time() # Load dataset validation_percentage = 0.2 X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_full_mnist( validation_percentage) # Preprocess and adapt the data mean, standard_deviation = find_mean_and_deviation(X_train) X_train = pre_process_images(X_train, mean, standard_deviation) X_val = pre_process_images(X_val, mean, standard_deviation) X_test = pre_process_images(X_test, mean, standard_deviation) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) Y_test = one_hot_encode(Y_test, 10) # Hyperparameters num_epochs = 20 learning_rate = .1 batch_size = 32 neurons_per_layer = [64, 10] momentum_gamma = .9 # Task 3 hyperparameter # Settings for task 3. Keep all to false for task 2.
from task2a import one_hot_encode, pre_process_images, SoftmaxModel, gradient_approximation_test, \ find_mean_and_deviation if __name__ == "__main__": # Simple test on one-hot encoding Y = np.zeros((1, 1), dtype=int) Y[0, 0] = 3 Y = one_hot_encode(Y, 10) assert Y[0, 3] == 1 and Y.sum() == 1, \ f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}" # Load and preprocess data X_train, Y_train, *_ = utils.load_full_mnist(0.1) # Preprocess and adapt the data mean, standard_deviation = find_mean_and_deviation(X_train) X_train = pre_process_images(X_train, mean, standard_deviation) Y_train = one_hot_encode(Y_train, 10) assert X_train.shape[1] == 785,\ f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}" # Modify your network here neurons_per_layer = [64, 64, 10] use_improved_sigmoid = True use_improved_weight_init = True model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) # Gradient approximation check for 100 images X_train = X_train[:100] Y_train = Y_train[:100]
if __name__ == "__main__": # Load dataset validation_percentage = 0.2 X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_full_mnist( validation_percentage) # One hot encode data Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) Y_test = one_hot_encode(Y_test, 10) # Preprocess data using mean and std of training set # Find mean and std of training set mu = np.mean(X_train) sigma = np.std(X_train) X_train = pre_process_images(X_train, mean=mu, std=sigma) X_val = pre_process_images(X_val, mean=mu, std=sigma) X_test = pre_process_images(X_test, mean=mu, std=sigma) # Hyperparameters num_epochs = 20 learning_rate = .1 batch_size = 32 neurons_per_layer = [64, 10] momentum_gamma = .9 # Task 3 hyperparameter # Settings for task 3. Keep all to false for task 2. use_shuffle = False use_improved_sigmoid = False use_improved_weight_init = False use_momentum = False
num_epochs = 20 learning_rate = .1 batch_size = 32 #task4d #neurons_per_layer = [60,60,10] neurons_per_layer = [64, 10] #neurons_per_layer = [16, 10] #neurons_per_layer = [128, 10] momentum_gamma = .9 # Task 3 hyperparameter # Calibration m = mean(X_train) std = stddev(X_train) X_train = pre_process_images(X_train, m, std) X_val = pre_process_images(X_val, m, std) X_test = pre_process_images(X_test, m, std) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) Y_test = one_hot_encode(Y_test, 10) # Settings for task 3. Keep all to false for task 2. """ use_shuffle = [False,True,True,True,True] use_improved_sigmoid = [False,False,True,True,True] use_improved_weight_init = [False,False,False,True,True] use_momentum = [False,False,False,False,True] train_loss=[0,0,0,0,0] val_loss=[0,0,0,0,0] train_accuracy=[0,0,0,0,0]
X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_binary_dataset( category1, category2, validation_percentage) # hyperparameters num_epochs = 50 learning_rate = 0.2 batch_size = 128 l2_reg_lambda = 0 model, train_loss, val_loss, train_accuracy, val_accuracy = train( num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, l2_reg_lambda=l2_reg_lambda) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(pre_process_images(X_train)))) print("Final Test Entropy Loss:", cross_entropy_loss(Y_test, model.forward(pre_process_images(X_test)))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(pre_process_images(X_val)))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("Test accuracy:", calculate_accuracy(X_test, Y_test, model)) # Plot loss #plt.ylim([0., .4]) utils.plot_loss(train_loss, "Training Loss") utils.plot_loss(val_loss, "Validation Loss") plt.legend() plt.savefig("binary_train_loss.png")
fmt=fmt_string_train) utils.plot_loss(val_accuracy_loaded, "Validation Accuracy " + displayname, fmt=fmt_string_val) if __name__ == "__main__": # Load dataset validation_percentage = 0.2 X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_full_mnist( validation_percentage) #preprocessing of targets and images x_train_mean = np.mean(X_train) x_train_std = np.std(X_train) X_train = pre_process_images(X_train, x_train_mean, x_train_std) X_test = pre_process_images(X_test, x_train_mean, x_train_std) X_val = pre_process_images(X_val, x_train_mean, x_train_std) Y_train = one_hot_encode(Y_train, 10) Y_test = one_hot_encode(Y_test, 10) Y_val = one_hot_encode(Y_val, 10) # Hyperparameters num_epochs = 50 learning_rate = .1 batch_size = 32 neurons_per_layer = [60, 60, 10] momentum_gamma = .9 # Task 3 hyperparameter # Settings for task 3. Keep all to false for task 2. use_shuffle = True
num_epochs = 50 learning_rate = .02 batch_size = 32 neurons_per_layer = [64, 10] momentum_gamma = .9 # Task 3 hyperparameter shuffle_data = True use_improved_sigmoid = True use_improved_weight_init = True use_momentum = True # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() mean = X_train.mean() sd = X_train.std() X_train = pre_process_images(X_train, mean, sd) X_val = pre_process_images(X_val, mean, sd) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train,