def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) if X_train.shape[1] == 784: X_train = pre_process_images(X_train) if X_test.shape[1] == 784: X_test = pre_process_images(X_test) if X_val.shape[1] == 784: X_val = pre_process_images(X_val) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.w += -1 * learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ number_of_predictions = X.shape[0] number_of_rights = 0 y_hat = model.forward(X) for index in range(0, number_of_predictions): if y_hat[index] >= 0.5: y_hat[index] = 1 else: y_hat[index] = 0 if y_hat[index] == targets[index]: number_of_rights += 1 # Task 2c accuracy = number_of_rights / number_of_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ logits = model.forward(X) accuracy = np.count_nonzero(np.abs(targets - logits) < 0.5)/X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ y = model.forward(X) correct = np.isclose(targets, y, atol=0.5) return np.sum(correct) / y.shape[0]
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) outputs = model.forward(X).round() accuracy = np.mean(outputs == targets) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # Creating vector of predictions (1 or 0) predictions = (model.forward(X) >= 0.5) # Counting everytime prediction equals target. Then divding by batch size accuracy = np.count_nonzero(predictions == targets) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) outputs = model.forward(X) sum = np.count_nonzero(np.abs(outputs - targets) < 0.5) accuracy = sum / len(outputs) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) predictions = model.forward(X) # Predictions [batch size, 1] accuracy = 1 - np.sum( np.absolute(np.round(predictions) - targets)) / targets.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) y_hat = model.forward(X) y_hat = np.round(y_hat, 0) accuracy = np.count_nonzero((y_hat == targets)) / targets.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Computes the accuracy of the number detection of the neural network Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # Task 2c outputs = model.forward(X) false_classifications_count = (np.absolute(targets - outputs) < 0.5).sum() accuracy = false_classifications_count / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) y_pred = model.forward(X) # print(f'Shape of the prediction: {y_pred.shape}') y_pred_bin = [1 if pred >= 0.5 else 0 for pred in y_pred] accuracy = np.sum(y_pred_bin == targets.flatten()) / targets.shape[0] # print(accuracy) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # Task 2c tot_preds = X.shape[0] # total number of predictions num_errors = np.sum(abs( targets - model.forward(X).round())) # abs error between target and prediction accuracy = (tot_preds - num_errors) / tot_preds return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) predictions = model.forward(X) num_predictions = predictions.shape[0] correct_predictions = np.sum(predictions.round() == targets) accuracy = correct_predictions / num_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # perform predictions Yhat = model.forward(X) # calculate accurancy by dividing the correct predictions with the total number of predictions accuracy = (Yhat.round() == targets).mean() return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) lgts = model.forward(X) lgts = [(1 if l >= 0.5 else 0) for l in lgts] accuracy = (1 / targets.shape[0]) * np.sum( [(1 if t == 1 else 0) for (t, l) in zip(targets, lgts)]) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # Task 2c output = model.forward(X) rounded_output = np.around(output) difference = targets - rounded_output num_correct = len(np.where(difference == 0)[0]) accuracy = num_correct / len(output) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ #Task 2c output = model.forward(X) predictions = np.where(output >= 0.5, 1, np.zeros(output.shape)) correct_pred = np.count_nonzero(predictions == targets) total_pred = output.shape[0] accuracy = correct_pred / total_pred return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 1] model: model of class BinaryModel Returns: Accuracy (float) """ # TODO Implement this function (Task 2c) # First computation of the prediction outputs = model.forward(X) # Convert the prediction into 0 and 1, using 0.5 as threshold. Then "Accuracy" is computed accuracy = np.sum( np.where(outputs > 0.5, 1, 0) == targets) / targets.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: BinaryModel) -> float: """ Args: X: images of shape [batch size, 785] outputs: outputs of model of shape: [batch size, 1] targets: labels/targets of each image of shape: [batch size, 1] Returns: Accuracy (float) """ # Task 2c outputs = model.forward(X) N = targets.shape[0] binary_threshold = 0.5 correctOutputs = 0 for i in range(N): if (targets[i] == 1 and outputs[i] >= binary_threshold) or ( targets[i] == 0 and outputs[i] < binary_threshold): correctOutputs += 1 return correctOutputs / N
def train(num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) global_step = 0 last_val_loss = 1 best_val_loss = 1 best_weights = None for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # Forward pass train_outputs = model.forward(X_batch) # Backward propagation model.backward(X_batch, train_outputs, Y_batch) model.w -= learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, train_outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 # Compute validation loss for early stopping val_outputs = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, val_outputs) if _val_loss <= best_val_loss: best_weights = model.w best_val_loss = _val_loss if _val_loss > last_val_loss: model.w = best_weights break last_val_loss = _val_loss return model, train_loss, val_loss, train_accuracy, val_accuracy
# ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Intialize model model = BinaryModel() # Train model trainer = LogisticTrainer( model, learning_rate, batch_size, shuffle_dataset, early_stopping, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) # Plot and print everything you want of information print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) # Plot loss for first model (task 2b) plt.ylim([0., .2]) utils.plot_loss(train_history["loss"], "Training Loss", npoints_to_average=10) utils.plot_loss(val_history["loss"], "Validation Loss") plt.legend() plt.xlabel("Number of Training Steps") plt.ylabel("Cross Entropy Loss - Average") plt.savefig("task2b_binary_train_loss.png") plt.show()
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda, X_train.shape[0]) # initialize weights and outputs model.w = np.zeros((785, 1)) # for early stopping is_val_loss_increasing = [False] * num_increases global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # forward and backward pass output = model.forward(X_batch) model.backward(X_batch, output, Y_batch) # update weights model.w = model.w - learning_rate * model.grad # Track training loss continuously output_train = model.forward(X_train) _train_loss = cross_entropy_loss(Y_train, output_train) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: output_val = model.forward(X_val) _val_loss = cross_entropy_loss(Y_val, output_val) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) # early stopping stopping = False if with_stopping == True and global_step > 0: stopping = early_stopping(num_increases, is_val_loss_increasing, val_loss, global_step, num_steps_per_val) if with_stopping == True and stopping is True: break global_step += 1 if with_stopping == True and stopping is True: print('Epoch =', epoch) break return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test, early_stopping_step # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) # Early stopping var init last_loss = INT_MAX already_failed = 0 global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] # The mini-batch gradient descent algorithm for m batches and a single epoch. model.backward(X_batch, model.forward(X_batch), Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = cross_entropy_loss(Y_batch, model.forward(X_batch)) train_loss[global_step] = _train_loss[0, 0] # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss[0, 0] train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) # Early stopping criteria if (_val_loss[0, 0] > last_loss and already_failed > 20): # Stop early #print("Early stopping kicked in at epoch nr.:",epoch+1) #return model, train_loss, val_loss, train_accuracy, val_accuracy if early_stopping_step == 0: early_stopping_step = global_step # Means failed this round elif (_val_loss[0, 0] > last_loss): already_failed += 1 # The loss improved this round, reset counter else: last_loss = _val_loss[0, 0] already_failed = 0 global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def train( num_epochs: int, learning_rate: float, batch_size: int, l2_reg_lambda: float # Task 3 hyperparameter. Can be ignored before this. ): """ Function that implements logistic regression through mini-batch gradient descent for the given hyperparameters """ global X_train, X_val, X_test # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} model = BinaryModel(l2_reg_lambda) global_step = 0 for epoch in range(num_epochs): for step in range(num_batches_per_epoch): # Select our mini-batch of images / labels start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] outputs = model.forward(X_batch) model.backward(X_batch, outputs, Y_batch) model.w = model.w - learning_rate * model.grad # Track training loss continuously _train_loss = 0 _train_loss = cross_entropy_loss(Y_batch, outputs) train_loss[global_step] = _train_loss # Track validation loss / accuracy every time we progress 20% through the dataset if global_step % num_steps_per_val == 0: _val_loss = 0 _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) #early stopping counter = 0 for i in range(10): current = (global_step - i * num_steps_per_val) previous = (global_step - (i + 1) * num_steps_per_val) #when counter is 9 or greater early stopping kicks in if (len(val_accuracy) > 8 * num_steps_per_val and val_accuracy[current] <= val_accuracy[previous]): counter += 1 if counter > 8: print("Stopping early at step: " + str(global_step) + " in epoch " + str(epoch)) return model, train_loss, val_loss, train_accuracy, val_accuracy global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy