model.w[i] = orig # Actual gradient logits = model.forward(X) model.backward(X, logits, Y) difference = gradient_approximation - model.grad[i, 0] assert abs(difference) <= epsilon**2,\ f"Calculated gradient is incorrect. " \ f"Approximation: {gradient_approximation}, actual gradient: {model.grad[i,0]}\n" \ f"If this test fails there could be errors in your cross entropy loss function, " \ f"forward function or backward function" if __name__ == "__main__": category1, category2 = 2, 3 X_train, Y_train, *_ = utils.load_binary_dataset(category1, category2) X_train = pre_process_images(X_train) assert X_train.max() <= 1.0, f"The images (X_train) should be normalized to the range [-1, 1]" assert X_train.min() < 0 and X_train.min() >= -1, f"The images (X_train) should be normalized to the range [-1, 1]" assert X_train.shape[1] == 785,\ f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}" # Simple test for forward pass. Note that this does not cover all errors! model = BinaryModel() logits = model.forward(X_train) np.testing.assert_almost_equal( logits.mean(), .5, err_msg="Since the weights are all 0's, the sigmoid activation should be 0.5") # Gradient approximation check for 100 images X_train = X_train[:100]
accuracy_val = calculate_accuracy( X_val, Y_val, self.model) return loss, accuracy_train, accuracy_val if __name__ == "__main__": # hyperparameters DO NOT CHANGE IF NOT SPECIFIED IN ASSIGNMENT TEXT num_epochs = 50 learning_rate = 0.05 batch_size = 128 shuffle_dataset = False early_stopping = True # Load dataset category1, category2 = 2, 3 X_train, Y_train, X_val, Y_val = utils.load_binary_dataset( category1, category2) X_train = pre_process_images(X_train) X_val = pre_process_images(X_val) # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED. # Intialize model model = BinaryModel() # Train model trainer = LogisticTrainer( model, learning_rate, batch_size, shuffle_dataset, early_stopping, X_train, Y_train, X_val, Y_val, ) train_history, val_history = trainer.train(num_epochs)
_val_loss = 0 val_loss[global_step] = _val_loss train_accuracy[global_step] = calculate_accuracy( X_train, Y_train, model) val_accuracy[global_step] = calculate_accuracy( X_val, Y_val, model) global_step += 1 return model, train_loss, val_loss, train_accuracy, val_accuracy # Load dataset category1, category2 = 2, 3 validation_percentage = 0.1 X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_binary_dataset( category1, category2, validation_percentage) # hyperparameters num_epochs = 50 learning_rate = 0.2 batch_size = 128 l2_reg_lambda = 0 model, train_loss, val_loss, train_accuracy, val_accuracy = train( num_epochs=num_epochs, learning_rate=learning_rate, batch_size=batch_size, l2_reg_lambda=l2_reg_lambda) print("Final Train Cross Entropy Loss:", cross_entropy_loss(Y_train, model.forward(pre_process_images(X_train)))) print("Final Test Entropy Loss:",