def test_gradient_check(self): def quad(x): return np.sum(x ** 2), x * 2 self.assertEqual([], gradient_check(quad, np.array(123.456))) # scalar test self.assertEqual([], gradient_check(quad, np.random.randn(3,))) # 1-D test self.assertEqual([], gradient_check(quad, np.random.randn(4,5))) # 2-D test
def test_gradient_check(self): def quad(x): return np.sum(x**2), x * 2 self.assertEqual([], gradient_check(quad, np.array(123.456))) # scalar test self.assertEqual([], gradient_check(quad, np.random.randn(3, ))) # 1-D test self.assertEqual([], gradient_check(quad, np.random.randn(4, 5))) # 2-D test
def test_gradient_check_sigmoid(self): def sigmoid_check(x): return expit(x), sigmoid_gradient(expit(x)) x = np.array(0.0) result = gradient_check(sigmoid_check, x) self.assertEqual([], result)
def test_supervised_gradient_descent(self): def linear_regression_cost_gradient(parameters, input, output): prediction = np.dot(parameters, input) cost = (prediction - output)**2 gradient = 2.0 * (prediction - output) * input return cost, gradient inputs = np.random.normal(0.0, size=(10, 2)) outputs = np.random.normal(0.0, size=10) initial_parameters = np.random.uniform(-1.0, 1.0, size=2) # Create cost and gradient function for supervised SGD and check its gradient cost_gradient = bind_cost_gradient(linear_regression_cost_gradient, inputs, outputs, sampler=batch_sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Run gradient descent on the function and see if it minimizes cost function actual, cost_history = gradient_descent(cost_gradient, initial_parameters, 10) # Compute exact solution of linear regression by closed form expected = np.linalg.solve(np.dot(inputs.T, inputs), np.dot(inputs.T, outputs)) for e, a in zip(expected, actual): self.assertAlmostEqual(e, a, places=0)
def assertMultinomialLogisticRegression(self, sampler): data_size = 3 input_size = 5 output_size = 4 inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size)) outputs = np.random.randint(0, output_size, size=data_size) initial_parameters = np.random.normal(size=(input_size, output_size)) # Create cost and gradient function for gradient descent and check its gradient cost_gradient = bind_cost_gradient( multinomial_logistic_regression_cost_gradient, inputs, outputs, sampler=sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train multinomial logistic regression and see if it predicts correct labels final_parameters, cost_history = gradient_descent( cost_gradient, initial_parameters, 100) predictions = np.argmax(softmax(np.dot(final_parameters.T, inputs.T)), axis=0) for output, prediction in zip(outputs, predictions): self.assertEqual(output, prediction)
def test_supervised_gradient_descent(self): def linear_regression_cost_gradient(parameters, input, output): prediction = np.dot(parameters, input) cost = (prediction - output) ** 2 gradient = 2.0 * (prediction - output) * input return cost, gradient inputs = np.random.normal(0.0, size=(10, 2)) outputs = np.random.normal(0.0, size=10) initial_parameters = np.random.uniform(-1.0, 1.0, size=2) # Create cost and gradient function for supervised SGD and check its gradient cost_gradient = bind_cost_gradient(linear_regression_cost_gradient, inputs, outputs, sampler=batch_sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Run gradient descent on the function and see if it minimizes cost function actual, cost_history = gradient_descent(cost_gradient, initial_parameters, 10) # Compute exact solution of linear regression by closed form expected = np.linalg.solve(np.dot(inputs.T, inputs), np.dot(inputs.T, outputs)) for e, a in zip(expected, actual): self.assertAlmostEqual(e, a, places=0)
def gradient_check(self, inputs, outputs): # Create cost and gradient function for gradient check shapes = [self.W_shape, self.U_shape, self.H_shape, self.C_shape] flatten_nplm_cost_gradient = flatten_cost_gradient(nplm_cost_gradient, shapes) cost_gradient = bind_cost_gradient(flatten_nplm_cost_gradient, inputs, outputs) # Gradient check! parameters_size = np.sum(np.product(shape) for shape in shapes) initial_parameters = np.random.normal(size=parameters_size) result = gradient_check(cost_gradient, initial_parameters) return result
def test_neural_network(self): np.random.seed(0) input_size = 2 hidden_size = 2 output_size = 2 # Classic XOR test data inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) outputs = np.array([0, 1, 1, 0]) # Create cost and gradient function for gradient descent shapes = [(hidden_size, (input_size)), (output_size, (hidden_size))] flatten_neural_network_cost_gradient = flatten_cost_gradient( neural_network_cost_gradient, shapes) cost_gradient = bind_cost_gradient( flatten_neural_network_cost_gradient, inputs, outputs, sampler=batch_sampler) # Check gradient with initial parameters parameters_size = np.sum(np.product(shape) for shape in shapes) initial_parameters = np.random.normal(size=parameters_size) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train neural network (this is slow even such a simple task!) final_parameters, cost_history = gradient_descent( cost_gradient, initial_parameters, 1000) # Check if cost monotonically decrease (no guarantee in theory, but works in practice) previous_cost = None for cost in cost_history: if previous_cost is not None: self.assertLessEqual(cost, previous_cost) previous_cost = cost # TODO: extract duplicated code for prediction to reusable component split_index = hidden_size * (input_size) W1, W2 = np.split(final_parameters, [split_index]) W1 = W1.reshape((hidden_size, input_size)) W2 = W2.reshape((output_size, hidden_size)) for input, output in zip(inputs, outputs): input = input.reshape(-1, 1) hidden_layer = expit(W1.dot(input)) inside_softmax = W2.dot(hidden_layer) prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1) label = np.argmax(prediction) # Check if output is correctly predicted self.assertEqual(output, label)
def gradient_check(self, inputs, outputs): # Create cost and gradient function for gradient check shapes = [self.W_shape, self.U_shape, self.H_shape, self.C_shape] flatten_nplm_cost_gradient = flatten_cost_gradient( nplm_cost_gradient, shapes) cost_gradient = bind_cost_gradient(flatten_nplm_cost_gradient, inputs, outputs) # Gradient check! parameters_size = np.sum(np.product(shape) for shape in shapes) initial_parameters = np.random.normal(size=parameters_size) result = gradient_check(cost_gradient, initial_parameters) return result
def test_logistic_regression(self): input = np.random.uniform(-10.0, 10.0, size=10) output = np.random.randint(0, 2) def logistic_regression_wrapper(parameters): return logistic_regression_cost_gradient(parameters, input, output) initial_parameters = np.random.normal(scale=1e-5, size=10) result = gradient_check(logistic_regression_wrapper, initial_parameters) self.assertEqual([], result) # Train logistic regression and see if it predicts correct label final_parameters, cost_history = gradient_descent(logistic_regression_wrapper, initial_parameters, 100) prediction = expit(np.dot(input, final_parameters)) > 0.5 self.assertEqual(output, prediction)
def test_neural_network(self): np.random.seed(0) input_size = 2 hidden_size = 2 output_size = 2 # Classic XOR test data inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) outputs = np.array([0, 1, 1, 0]) # Create cost and gradient function for gradient descent shapes = [(hidden_size, (input_size)), (output_size, (hidden_size))] flatten_neural_network_cost_gradient = flatten_cost_gradient(neural_network_cost_gradient, shapes) cost_gradient = bind_cost_gradient(flatten_neural_network_cost_gradient, inputs, outputs, sampler=batch_sampler) # Check gradient with initial parameters parameters_size = np.sum(np.product(shape) for shape in shapes) initial_parameters = np.random.normal(size=parameters_size) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train neural network (this is slow even such a simple task!) final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 1000) # Check if cost monotonically decrease (no guarantee in theory, but works in practice) previous_cost = None for cost in cost_history: if previous_cost is not None: self.assertLessEqual(cost, previous_cost) previous_cost = cost # TODO: extract duplicated code for prediction to reusable component split_index = hidden_size * (input_size) W1, W2 = np.split(final_parameters, [split_index]) W1 = W1.reshape((hidden_size, input_size)) W2 = W2.reshape((output_size, hidden_size )) for input, output in zip(inputs, outputs): input = input.reshape(-1, 1) hidden_layer = expit(W1.dot(input)) inside_softmax = W2.dot(hidden_layer) prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1) label = np.argmax(prediction) # Check if output is correctly predicted self.assertEqual(output, label)
def test_logistic_regression(self): input = np.random.uniform(-10.0, 10.0, size=10) output = np.random.randint(0, 2) def logistic_regression_wrapper(parameters): return logistic_regression_cost_gradient(parameters, input, output) initial_parameters = np.random.normal(scale=1e-5, size=10) result = gradient_check(logistic_regression_wrapper, initial_parameters) self.assertEqual([], result) # Train logistic regression and see if it predicts correct label final_parameters, cost_history = gradient_descent( logistic_regression_wrapper, initial_parameters, 100) prediction = expit(np.dot(input, final_parameters)) > 0.5 self.assertEqual(output, prediction)
def assertMultinomialLogisticRegression(self, sampler): data_size = 3 input_size = 5 output_size = 4 inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size)) outputs = np.random.randint(0, output_size, size=data_size) initial_parameters = np.random.normal(size=(input_size, output_size)) # Create cost and gradient function for gradient descent and check its gradient cost_gradient = bind_cost_gradient(multinomial_logistic_regression_cost_gradient, inputs, outputs, sampler=sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train multinomial logistic regression and see if it predicts correct labels final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 100) predictions = np.argmax(softmax(np.dot(final_parameters.T, inputs.T)), axis=0) for output, prediction in zip(outputs, predictions): self.assertEqual(output, prediction)
def assertLogisticRegression(self, sampler): data_size = 3 input_size = 5 inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size)) outputs = np.random.randint(0, 2, size=data_size) initial_parameters = np.random.normal(scale=1e-5, size=input_size) # Create cost and gradient function for gradient descent and check its gradient cost_gradient = bind_cost_gradient(logistic_regression_cost_gradient, inputs, outputs, sampler=sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train logistic regression and see if it predicts correct labels final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 100) predictions = expit(np.dot(inputs, final_parameters)) > 0.5 # Binary classification of 3 data points with 5 dimension is always linearly separable for output, prediction in zip(outputs, predictions): self.assertEqual(output, prediction)
def test_multinomial_logistic_regression(self): input_size = 10 output_size = 5 input = np.random.normal(size=(input_size,)) output = np.random.randint(0, output_size) def multinomial_logistic_regression_wrapper(parameters): return multinomial_logistic_regression_cost_gradient(parameters, input, output) initial_parameters = np.random.normal(size=(input_size, output_size)) result = gradient_check(multinomial_logistic_regression_wrapper, initial_parameters) self.assertEqual([], result) # Train multinomial logistic regression and see if it predicts correct label final_parameters, cost_history = gradient_descent( multinomial_logistic_regression_wrapper, initial_parameters, 100) prediction = softmax(np.dot(final_parameters.T, input)) > 0.5 for i in range(len(prediction)): if output == i: self.assertEqual(1, prediction[i]) else: self.assertEqual(0, prediction[i])
def assertLogisticRegression(self, sampler): data_size = 3 input_size = 5 inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size)) outputs = np.random.randint(0, 2, size=data_size) initial_parameters = np.random.normal(scale=1e-5, size=input_size) # Create cost and gradient function for gradient descent and check its gradient cost_gradient = bind_cost_gradient(logistic_regression_cost_gradient, inputs, outputs, sampler=sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train logistic regression and see if it predicts correct labels final_parameters, cost_history = gradient_descent( cost_gradient, initial_parameters, 100) predictions = expit(np.dot(inputs, final_parameters)) > 0.5 # Binary classification of 3 data points with 5 dimension is always linearly separable for output, prediction in zip(outputs, predictions): self.assertEqual(output, prediction)
def test_multinomial_logistic_regression(self): input_size = 10 output_size = 5 input = np.random.normal(size=(input_size, )) output = np.random.randint(0, output_size) def multinomial_logistic_regression_wrapper(parameters): return multinomial_logistic_regression_cost_gradient( parameters, input, output) initial_parameters = np.random.normal(size=(input_size, output_size)) result = gradient_check(multinomial_logistic_regression_wrapper, initial_parameters) self.assertEqual([], result) # Train multinomial logistic regression and see if it predicts correct label final_parameters, cost_history = gradient_descent( multinomial_logistic_regression_wrapper, initial_parameters, 100) prediction = softmax(np.dot(final_parameters.T, input)) > 0.5 for i in range(len(prediction)): if output == i: self.assertEqual(1, prediction[i]) else: self.assertEqual(0, prediction[i])