def train( model: SoftmaxModel, datasets: typing.List[np.ndarray], num_epochs: int, learning_rate: float, batch_size: int, # Task 3 hyperparameters, use_shuffle: bool, use_momentum: bool, momentum_gamma: float): X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets # Utility variables num_batches_per_epoch = X_train.shape[0] // batch_size num_steps_per_val = num_batches_per_epoch // 5 # Tracking variables to track loss / accuracy train_loss = {} val_loss = {} train_accuracy = {} val_accuracy = {} global_step = 0 for epoch in range(num_epochs): # Task 3a # Shuffling before next epoch shuffle_in_unison(X_train, Y_train) for step in range(num_batches_per_epoch): start = step * batch_size end = start + batch_size X_batch, Y_batch = X_train[start:end], Y_train[start:end] y_hat = model.forward(X_batch) model.backward(X_batch, y_hat, Y_batch) model.ws[0] += -1 * learning_rate * model.grads[0] model.ws[1] += -1 * learning_rate * model.grads[1] # Track train / validation loss / accuracy # every time we progress 20% through the dataset if (global_step % num_steps_per_val) == 0: _val_loss = cross_entropy_loss(Y_val, model.forward(X_val)) val_loss[global_step] = _val_loss _train_loss = cross_entropy_loss(Y_batch, y_hat) train_loss[global_step] = _train_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (task 3c) logits = model.forward(X) outputs = np.zeros_like(logits) outputs[np.arange(len(logits)), logits.argmax(1)] = 1 accuracy = np.mean((outputs == targets).all(1)) return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (task 3c) y_hat = np.array(model.forward(X)) y_predicted_position = np.argmax(y_hat, axis=1) y_position = np.argmax(targets, axis=1) accuracy = np.count_nonzero( y_position == y_predicted_position) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # Creating vector of predictions (1 or 0) predictions = np.argmax(model.forward(X), axis=1) # Counting everytime prediction equals target. Then divding by batch size accuracy = np.count_nonzero( predictions == np.argmax(targets, axis=1)) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ output = model.forward(X) predictions = one_hot_encode(np.array([np.argmax(output, axis=1)]).T, 10) correct_pred = np.count_nonzero(targets * predictions) total_pred = output.shape[0] accuracy = correct_pred / total_pred return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # perform predictions Yhat = model.forward(X) # calculate accurancy by dividing the correct predictions with the total number of predictions accuracy = (Yhat.argmax(axis=1) == targets.argmax(axis=1)).mean() return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (task 3c) outputs = model.forward(X) max_outputs = np.argmax(outputs, axis=1) max_targets = np.argmax(targets, axis=1) sum = outputs.shape[0] - np.count_nonzero(max_outputs - max_targets) accuracy = sum / outputs.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (task 3c) # First computation of the prediction outputs = model.forward(X) # Convert the prediction into 0 and 1 by setting as 1 the highest value in the 10 outputs, the rest will be 0. accuracy = np.sum( outputs.argmax(1) == targets.argmax(1)) / targets.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ number_of_predictions = X.shape[0] number_of_rights = 0 y_hat = model.forward(X) for i in range(0, number_of_predictions): y_hat[i] = np.around(y_hat[i]) if np.array_equal(y_hat[i], targets[i]): number_of_rights += 1 accuracy = number_of_rights / number_of_predictions return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (task 3c) predictions = model.forward(X) num_predictions = predictions.shape[0] correct_predictions = np.sum( np.argmax(predictions, axis=1) == np.argmax(targets, axis=1)) return correct_predictions / num_predictions
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (task 3c) predictions = model.forward(X) accuracy = 0 for n in range(X.shape[0]): prediction = np.argmax(predictions[n, :]) target = np.argmax(targets[n, :]) if prediction == target: accuracy += 1 return accuracy / X.shape[0]
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (task 3c) # My prediction y_pred = model.forward(X) y_pred_int = np.zeros_like(y_pred) correctly_guessed = 0 for idx in range(y_pred.shape[0]): actual_pred = y_pred[idx, :] y_pred_int[idx, :] = [1 if pred==np.max(actual_pred) else 0 for pred in actual_pred] correctly_guessed += np.sum((targets[idx, :] == y_pred_int[idx, :]).all()) accuracy = correctly_guessed / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # forward pass logits = model.forward(X) # finding the index of the max values for both arrays logits = logits.argmax(axis=1) targets = targets.argmax(axis=1) # counting the equal entries and averaging accuracy = np.count_nonzero(np.equal(targets, logits)) / X.shape[0] return accuracy
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float: """ Args: X: images of shape [batch size, 785] targets: labels/targets of each image of shape: [batch size, 10] model: model of class SoftmaxModel Returns: Accuracy (float) """ # TODO: Implement this function (task 3c) accuracy = 0 lgts = model.forward(X) lgts_max = np.argmax(lgts, axis=1) targets_max = np.argmax(targets, axis=1) accuracy = ((1 / targets.shape[0]) * np.sum([(1 if l == t else 0) for (l, t) in zip(lgts_max, targets_max)])) return accuracy
learning_rate = 0.01 batch_size = 128 l2_reg_lambda = 0 shuffle_dataset = True # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Intialize model model = SoftmaxModel(l2_reg_lambda) # Train model trainer = SoftmaxTrainer( model, learning_rate, batch_size, shuffle_dataset, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train)))
) train_history_reg01, val_history_reg01 = trainer.train(num_epochs) ''' # You can finish the rest of task 4 below this point. #plotting for different lambdas: l2_lambdas = [1, .1, .01, .001] val_acc = [] weights = [] for i in l2_lambdas: model = SoftmaxModel(l2_reg_lambda=i) # Train model trainer = SoftmaxTrainer( model, learning_rate, batch_size, shuffle_dataset, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) val_acc.append(val_history["accuracy"]) weights.append(model.w)
learning_rate = 0.01 batch_size = 128 l2_reg_lambda = 0 shuffle_dataset = True # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Intialize model model = SoftmaxModel(l2_reg_lambda) # Train model trainer = SoftmaxTrainer( model, learning_rate, batch_size, shuffle_dataset, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model)) print("Final Validation accuracy:", calculate_accuracy(X_val, Y_val, model)) plt.ylim([0.2, .6])
learning_rate = 0.01 batch_size = 128 l2_reg_lambda = 0 shuffle_dataset = True # Load dataset X_train, Y_train, X_val, Y_val = utils.load_full_mnist() X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Initialize the model model_a = SoftmaxModel(X_train.shape[1], Y_train.shape[1], l2_reg_lambda) # Train model trainer = SoftmaxTrainer( model_a, learning_rate, batch_size, shuffle_dataset, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model_a.forward(X_train)))
X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) Y_train = one_hot_encode(Y_train, 10) Y_val = one_hot_encode(Y_val, 10) # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Train a model with L2 regularization (task 4b) l2_lambdas = [1, .1, .01, .001, 0] norm_list = [] train_history_list = [] val_history_list = [] for l in l2_lambdas: # Intialize model model = SoftmaxModel(l) # Train model trainer = SoftmaxTrainer( model, learning_rate, batch_size, shuffle_dataset, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs) train_history_list.append(train_history) val_history_list.append(val_history)
Y_val = one_hot_encode(Y_val, 10) Y_test = one_hot_encode(Y_test, 10) # Hyperparameters num_epochs = 20 learning_rate = .1 batch_size = 32 neurons_per_layer = [64, 10] momentum_gamma = .9 # Task 3 hyperparameter # Settings for task 3. Keep all to false for task 2. use_shuffle = False use_improved_sigmoid = False use_improved_weight_init = False use_momentum = False model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid, use_improved_weight_init) model, train_loss, val_loss, train_accuracy, val_accuracy = train( model, [X_train, Y_train, X_val, Y_val, X_test, Y_test], num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, use_shuffle=use_shuffle, use_momentum=use_momentum, momentum_gamma=momentum_gamma) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(X_train))) print("Final Validation Cross Entropy Loss:", cross_entropy_loss(Y_val, model.forward(X_val))) print("Final Test Cross Entropy Loss:", cross_entropy_loss(Y_test, model.forward(X_test)))