# Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) current_train_history, current_val_history = trainer.train(num_epochs) train_history[model_name] = current_train_history val_history[model_name] = current_val_history plt.figure(figsize=(16, 10)) plt.subplot(1, 2, 1) plt.ylim([0, .5]) for model_name in train_history.keys(): utils.plot_loss(train_history[model_name]["loss"], model_name,
# Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # 32 neurons per layer model_a = SoftmaxModel( neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer_a = SoftmaxTrainer( momentum_gamma, use_momentum, model_a, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history_a, val_history_a = trainer_a.train(num_epochs) # 128 neurons per layer neurons_per_layer = [128, 10] model_b = SoftmaxModel( neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer_b = SoftmaxTrainer( momentum_gamma, use_momentum, model_b, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, )
# Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Example created in assignment text - Comparing with and without shuffling. # FIRST CASE (weights) use_improved_weight_init = True model_weights = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer_weights = SoftmaxTrainer(
plt.xlabel("Training steps") plt.legend(loc="lower right") # First nothing use_improved_weight_init = False use_improved_sigmoid = False use_momentum = False model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history_nothing, val_history_nothing = trainer.train(num_epochs) # Adding improved weights use_improved_weight_init = True use_improved_sigmoid = False use_momentum = False model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init)
# Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Example created in assignment text - Comparing with and without shuffling. # YOU CAN DELETE EVERYTHING BELOW! use_improved_weight_init = True """ model_improved_weights = SoftmaxModel( neurons_per_layer, use_improved_sigmoid, use_improved_weight_init)
# Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) ######Naked model - no improvments###### model_naked = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer_naked = SoftmaxTrainer( momentum_gamma, use_momentum, model_naked, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history_naked, val_history_naked = trainer_naked.train(num_epochs) print("just basic") print("Train accuracy:", calculate_accuracy(X_train, Y_train, model_naked)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model_naked)) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model_naked.forward(X_val))) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model_naked.forward(X_train)))
mean = np.mean(X_train) std = np.std(X_train) X_train = pre_process_images(X_train, mean, std) X_val = pre_process_images(X_val, mean, std) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer_small, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, early_stopping, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Example created in assignment text - Comparing with and without shuffling. # YOU CAN DELETE EVERYTHING BELOW! shuffle_data = False model_large = SoftmaxModel(neurons_per_layer_large, use_improved_sigmoid, use_improved_weight_init) trainer_shuffle = SoftmaxTrainer( momentum_gamma,
# Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) print("Training standard model:\n") model = SoftmaxModel( neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("\n\n") # Example created in assignment text - Comparing with and without shuffling. # YOU CAN DELETE EVERYTHING BELOW!
# Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model_previous = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model_previous, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Example created in assignment text - Comparing with and without shuffling. # YOU CAN DELETE EVERYTHING BELOW! #Here you edit the new one! use_improved_weight_init = True use_improved_sigmoid = True use_momentum = True learning_rate = .02 neurons_per_layer = [64, 64, 10]
X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # Task 4 point a and b # Here is presented the model retrieved from the previous task3 model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) print( f'Training the Original model {neurons_per_layer[0]} neurons in hidden layer' ) train_history, val_history = trainer.train(num_epochs) # Task 4 # TASK 4 - a) 32 neurons for hidden layer neurons_per_layer = [32, 10] model_32neu = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init)
X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) neurons_per_layer = [64, 10] model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history_64, val_history_64 = trainer.train(num_epochs) print("\n\n --------------------------") print(neurons_per_layer) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) print("\n\n --------------------------")
mean = np.mean(X_train) std = np.std(X_train) X_train = pre_process_images(X_train, mean, std) X_val = pre_process_images(X_val, mean, std) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # 64 hidden units model64 = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer64 = SoftmaxTrainer( momentum_gamma, use_momentum, model64, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history64, val_history64 = trainer64.train(num_epochs) # 32 hidden units neurons_per_layer = [32, 10] model32 = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer32 = SoftmaxTrainer( momentum_gamma, use_momentum, model32,
# Task 4e) # ################### shuffle_data = True use_improved_sigmoid = True use_improved_weight_init = True use_momentum = False learning_rate_momentum = 0.02 neurons_per_layer = [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 10] model_64_layers = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer_shuffle = SoftmaxTrainer( momentum_gamma, use_momentum, model_64_layers, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history_64_layers, val_history_64_layers = trainer_shuffle.train( num_epochs) plt.figure(figsize=(20, 12)) plt.subplot(1, 2, 1) plt.ylim([0.00, 1.00]) utils.plot_loss(train_history_64_layers["loss"], "Training Loss 10 hidden layers", npoints_to_average=10)
X_train, Y_train, X_val, Y_val = utils.load_full_mnist() mean = np.mean(X_train) std = np.std(X_train) X_train = pre_process_images(X_train, mean, std) X_val = pre_process_images(X_val, mean, std) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Example created in assignment text - Comparing with and without shuffling. # YOU CAN DELETE EVERYTHING BELOW! use_improved_weight_init = True use_improved_sigmoid = True use_momentum = True learning_rate = 0.02 new_model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
plt.ylim([0.85, 1.01]) plt.ylabel("Accuracy") plt.xlabel("Training steps") plt.legend(loc="lower right") # Single hidden layer neurons_per_layer = [64, 10] model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history_single, val_history_single = trainer.train(num_epochs) print("---------", neurons_per_layer, "----------") print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model))
X_train, Y_train, X_val, Y_val = utils.load_full_mnist() mean = X_train.mean() sd = X_train.std() X_train = pre_process_images(X_train, mean, sd) X_val = pre_process_images(X_val, mean, sd) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) #two hidden layers neurons_per_layer = [59, 59, 10] model_two_hidden_layers = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer_two_hidden_layers = SoftmaxTrainer( momentum_gamma, use_momentum,
X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) ######1nd model - network from task 3###### use_improved_sigmoid = True use_improved_weight_init = True use_momentum = True model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) ######2nd model - network from task 4d ###### neurons_per_layer = [60, 60, 10] model1 = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer1 = SoftmaxTrainer( momentum_gamma, use_momentum, model1,
X_train, Y_train, X_val, Y_val = utils.load_full_mnist() mean = np.mean(X_train) std = np.std(X_train) X_train = pre_process_images(X_train, mean, std) X_val = pre_process_images(X_val, mean, std) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) model = SoftmaxModel( neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer = SoftmaxTrainer( momentum_gamma, use_momentum, model, learning_rate, batch_size, shuffle_data, early_stop, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) ## RUN WITH MOMENTUM use_momentum = True model_momentum = SoftmaxModel( neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) trainer_momentum = SoftmaxTrainer( momentum_gamma, use_momentum, model_momentum, learning_rate, batch_size, shuffle_data, early_stop, X_train, Y_train, X_val, Y_val, )