def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter ): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Intialize our model model = SoftmaxModel(l2_reg_lambda, X_train.shape[0], 10) # initialize weights and outputs model.w = np.zeros((785, 10)) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # forward and backward pass output = model.forward(X_batch) model.backward(X_batch, output, Y_batch) # update weights model.w = model.w - learning_rate * model.grad _train_loss = 0 train_loss[global_step] = _train_loss # Track training loss continuously output_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, output_train) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: output_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, output_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter ): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Intialize our model model = SoftmaxModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # The mini-batch gradient descent algorithm for m batches and a single epoch. model.backward(X_batch, model.forward(X_batch), Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, model.forward(X_batch)) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # Task 4c tot_preds = X.shape[0] # total number of predictions currectly_predicted = np.sum( np.argmax(model.forward(X), 1) == np.argmax(targets, 1)) accuracy = currectly_predicted / tot_preds return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ output = model.forward(X) predictions = one_hot_encode(np.array([np.argmax(output, axis=1)]).T, 10) correct_pred = np.count_nonzero(targets*predictions) total_pred = output.shape[0] accuracy = correct_pred/total_pred return accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter ): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Intialize our model model = SoftmaxModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] _train_loss = 0 train_loss[global_step] = _train_loss # Track training loss continuously _train_loss = 0 train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = 0 val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter ): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} if X_train.shape[1] == 784: X_train = pre_process_images(X_train) if X_test.shape[1] == 784: X_test = pre_process_images(X_test) if X_val.shape[1] == 784: X_val = pre_process_images(X_val) # Intialize our model model = SoftmaxModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.w += -1 * learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ number_of_predictions = X.shape[0] number_of_rights = 0 y_hat = model.forward(X) for i in range(0, number_of_predictions): y_hat[i] = np.around(y_hat[i]) if np.array_equal(y_hat[i], targets[i]): number_of_rights += 1 accuracy = number_of_rights / number_of_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] outputs: outputs of model of shape: [batch size, 10] targets: labels/targets of each image of shape: [batch size, 10] Returns: Accuracy (float) """ outputs = model.forward(X) N = targets.shape[0] correctOutputs = 0 for i in range(N): target = np.where(targets[i] == 1)[0][0] output = np.argmax(outputs[i]) if target == output: correctOutputs += 1 return correctOutputs / N
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ output = model.forward(X) result_output = np.argmax(output, axis=1) target = np.argmax(targets, axis=1) correct = target == result_output accuracy = np.mean(correct) return accuracy
def train(num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float): global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} # Intialize our model model = SoftmaxModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Forward pass train_outputs = model.forward(X_batch) # Backward propagation model.backward(X_batch, train_outputs, Y_batch) model.w -= learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, train_outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
learning_rate = 0.01 batch_size = 128 l2_reg_lambda = 0 shuffle_dataset = True # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Intialize model model = SoftmaxModel(l2_reg_lambda) # Train model trainer = SoftmaxTrainer( model, learning_rate, batch_size, shuffle_dataset, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) plt.ylim([0.2, .6])