def assertMultinomialLogisticRegression(self, sampler): data_size = 3 input_size = 5 output_size = 4 inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size)) outputs = np.random.randint(0, output_size, size=data_size) initial_parameters = np.random.normal(size=(input_size, output_size)) # Create cost and gradient function for gradient descent and check its gradient cost_gradient = bind_cost_gradient( multinomial_logistic_regression_cost_gradient, inputs, outputs, sampler=sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train multinomial logistic regression and see if it predicts correct labels final_parameters, cost_history = gradient_descent( cost_gradient, initial_parameters, 100) predictions = np.argmax(softmax(np.dot(final_parameters.T, inputs.T)), axis=0) for output, prediction in zip(outputs, predictions): self.assertEqual(output, prediction)
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_file', type=argparse.FileType()) parser.add_argument('output_file', type=argparse.FileType('w')) parser.add_argument('vector_size', type=int) parser.add_argument('context_size', type=int) parser.add_argument('vocabulary_size', type=int) args = parser.parse_args() sentences = list(lower(tokenize(args.input_file))) dictionary = build_dictionary(sentences, args.vocabulary_size) indices = to_indices(sentences, dictionary) inputs, outputs = create_context(indices, args.context_size) cost_gradient = bind_cost_gradient(skip_gram_cost_gradient, inputs, outputs, sampler=get_stochastic_sampler(100)) initial_parameters = np.random.normal(size=(2, len(dictionary) + 1, args.vector_size)) parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 10000) input_vectors, output_vectors = parameters word_vectors = input_vectors + output_vectors sorted_pairs = sorted(dictionary.items(), key=operator.itemgetter(1)) words = [word for word, index in sorted_pairs] for word in words: vector = word_vectors[dictionary[word]] vector_string = ' '.join(str(element) for element in vector) print(word, vector_string, file=args.output_file)
def test_supervised_gradient_descent(self): def linear_regression_cost_gradient(parameters, input, output): prediction = np.dot(parameters, input) cost = (prediction - output) ** 2 gradient = 2.0 * (prediction - output) * input return cost, gradient inputs = np.random.normal(0.0, size=(10, 2)) outputs = np.random.normal(0.0, size=10) initial_parameters = np.random.uniform(-1.0, 1.0, size=2) # Create cost and gradient function for supervised SGD and check its gradient cost_gradient = bind_cost_gradient(linear_regression_cost_gradient, inputs, outputs, sampler=batch_sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Run gradient descent on the function and see if it minimizes cost function actual, cost_history = gradient_descent(cost_gradient, initial_parameters, 10) # Compute exact solution of linear regression by closed form expected = np.linalg.solve(np.dot(inputs.T, inputs), np.dot(inputs.T, outputs)) for e, a in zip(expected, actual): self.assertAlmostEqual(e, a, places=0)
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_file', type=argparse.FileType()) parser.add_argument('output_file', type=argparse.FileType('w')) parser.add_argument('vector_size', type=int) parser.add_argument('context_size', type=int) args = parser.parse_args() dictionary = {} data = preprocess(args.input_file, dictionary) inputs, outputs = load_data(data, args.context_size) cost_gradient = bind_cost_gradient(skip_gram_cost_gradient, inputs, outputs, sampler=get_stochastic_sampler(100)) initial_parameters = np.random.normal(size=(2, len(dictionary), args.vector_size)) parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 10) input_vectors, output_vectors = parameters word_vectors = input_vectors + output_vectors words = [ word for word, index in sorted(dictionary.items(), key=operator.itemgetter(1)) ] for word, vector in zip(words, word_vectors): print(word, ' '.join(str(element) for element in vector), file=args.output_file)
def train(self, sentences, iterations=1000): # Preprocess sentences to create indices of context and next words self.dictionary = build_dictionary(sentences, self.vocabulary_size) indices = to_indices(sentences, self.dictionary) self.reverse_dictionary = { index: word for word, index in self.dictionary.items() } inputs, outputs = self.create_context(indices) # Create cost and gradient function for gradient descent shapes = [self.W_shape, self.U_shape, self.H_shape, self.C_shape] flatten_nplm_cost_gradient = flatten_cost_gradient( nplm_cost_gradient, shapes) cost_gradient = bind_cost_gradient(flatten_nplm_cost_gradient, inputs, outputs, sampler=get_stochastic_sampler(10)) # Train neural network parameters_size = np.sum(np.product(shape) for shape in shapes) initial_parameters = np.random.normal(size=parameters_size) self.parameters, cost_history = gradient_descent( cost_gradient, initial_parameters, iterations) return cost_history
def test_supervised_gradient_descent(self): def linear_regression_cost_gradient(parameters, input, output): prediction = np.dot(parameters, input) cost = (prediction - output)**2 gradient = 2.0 * (prediction - output) * input return cost, gradient inputs = np.random.normal(0.0, size=(10, 2)) outputs = np.random.normal(0.0, size=10) initial_parameters = np.random.uniform(-1.0, 1.0, size=2) # Create cost and gradient function for supervised SGD and check its gradient cost_gradient = bind_cost_gradient(linear_regression_cost_gradient, inputs, outputs, sampler=batch_sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Run gradient descent on the function and see if it minimizes cost function actual, cost_history = gradient_descent(cost_gradient, initial_parameters, 10) # Compute exact solution of linear regression by closed form expected = np.linalg.solve(np.dot(inputs.T, inputs), np.dot(inputs.T, outputs)) for e, a in zip(expected, actual): self.assertAlmostEqual(e, a, places=0)
def test_neural_network(self): np.random.seed(0) input_size = 2 hidden_size = 2 output_size = 2 # Classic XOR test data inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) outputs = np.array([0, 1, 1, 0]) # Create cost and gradient function for gradient descent shapes = [(hidden_size, (input_size)), (output_size, (hidden_size))] flatten_neural_network_cost_gradient = flatten_cost_gradient( neural_network_cost_gradient, shapes) cost_gradient = bind_cost_gradient( flatten_neural_network_cost_gradient, inputs, outputs, sampler=batch_sampler) # Check gradient with initial parameters parameters_size = np.sum(np.product(shape) for shape in shapes) initial_parameters = np.random.normal(size=parameters_size) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train neural network (this is slow even such a simple task!) final_parameters, cost_history = gradient_descent( cost_gradient, initial_parameters, 1000) # Check if cost monotonically decrease (no guarantee in theory, but works in practice) previous_cost = None for cost in cost_history: if previous_cost is not None: self.assertLessEqual(cost, previous_cost) previous_cost = cost # TODO: extract duplicated code for prediction to reusable component split_index = hidden_size * (input_size) W1, W2 = np.split(final_parameters, [split_index]) W1 = W1.reshape((hidden_size, input_size)) W2 = W2.reshape((output_size, hidden_size)) for input, output in zip(inputs, outputs): input = input.reshape(-1, 1) hidden_layer = expit(W1.dot(input)) inside_softmax = W2.dot(hidden_layer) prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1) label = np.argmax(prediction) # Check if output is correctly predicted self.assertEqual(output, label)
def test_logistic_regression(self): input = np.random.uniform(-10.0, 10.0, size=10) output = np.random.randint(0, 2) def logistic_regression_wrapper(parameters): return logistic_regression_cost_gradient(parameters, input, output) initial_parameters = np.random.normal(scale=1e-5, size=10) result = gradient_check(logistic_regression_wrapper, initial_parameters) self.assertEqual([], result) # Train logistic regression and see if it predicts correct label final_parameters, cost_history = gradient_descent(logistic_regression_wrapper, initial_parameters, 100) prediction = expit(np.dot(input, final_parameters)) > 0.5 self.assertEqual(output, prediction)
def test_logistic_regression(self): input = np.random.uniform(-10.0, 10.0, size=10) output = np.random.randint(0, 2) def logistic_regression_wrapper(parameters): return logistic_regression_cost_gradient(parameters, input, output) initial_parameters = np.random.normal(scale=1e-5, size=10) result = gradient_check(logistic_regression_wrapper, initial_parameters) self.assertEqual([], result) # Train logistic regression and see if it predicts correct label final_parameters, cost_history = gradient_descent( logistic_regression_wrapper, initial_parameters, 100) prediction = expit(np.dot(input, final_parameters)) > 0.5 self.assertEqual(output, prediction)
def test_neural_network(self): np.random.seed(0) input_size = 2 hidden_size = 2 output_size = 2 # Classic XOR test data inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) outputs = np.array([0, 1, 1, 0]) # Create cost and gradient function for gradient descent shapes = [(hidden_size, (input_size)), (output_size, (hidden_size))] flatten_neural_network_cost_gradient = flatten_cost_gradient(neural_network_cost_gradient, shapes) cost_gradient = bind_cost_gradient(flatten_neural_network_cost_gradient, inputs, outputs, sampler=batch_sampler) # Check gradient with initial parameters parameters_size = np.sum(np.product(shape) for shape in shapes) initial_parameters = np.random.normal(size=parameters_size) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train neural network (this is slow even such a simple task!) final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 1000) # Check if cost monotonically decrease (no guarantee in theory, but works in practice) previous_cost = None for cost in cost_history: if previous_cost is not None: self.assertLessEqual(cost, previous_cost) previous_cost = cost # TODO: extract duplicated code for prediction to reusable component split_index = hidden_size * (input_size) W1, W2 = np.split(final_parameters, [split_index]) W1 = W1.reshape((hidden_size, input_size)) W2 = W2.reshape((output_size, hidden_size )) for input, output in zip(inputs, outputs): input = input.reshape(-1, 1) hidden_layer = expit(W1.dot(input)) inside_softmax = W2.dot(hidden_layer) prediction = softmax(inside_softmax.reshape(-1)).reshape(-1, 1) label = np.argmax(prediction) # Check if output is correctly predicted self.assertEqual(output, label)
def train(self, sentences, iterations=1000): # Preprocess sentences to create indices of context and next words self.dictionary = build_dictionary(sentences, self.vocabulary_size) indices = to_indices(sentences, self.dictionary) self.reverse_dictionary = {index: word for word, index in self.dictionary.items()} inputs, outputs = self.create_context(indices) # Create cost and gradient function for gradient descent shapes = [self.W_shape, self.U_shape, self.H_shape, self.C_shape] flatten_nplm_cost_gradient = flatten_cost_gradient(nplm_cost_gradient, shapes) cost_gradient = bind_cost_gradient(flatten_nplm_cost_gradient, inputs, outputs, sampler=get_stochastic_sampler(10)) # Train neural network parameters_size = np.sum(np.product(shape) for shape in shapes) initial_parameters = np.random.normal(size=parameters_size) self.parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, iterations) return cost_history
def assertMultinomialLogisticRegression(self, sampler): data_size = 3 input_size = 5 output_size = 4 inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size)) outputs = np.random.randint(0, output_size, size=data_size) initial_parameters = np.random.normal(size=(input_size, output_size)) # Create cost and gradient function for gradient descent and check its gradient cost_gradient = bind_cost_gradient(multinomial_logistic_regression_cost_gradient, inputs, outputs, sampler=sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train multinomial logistic regression and see if it predicts correct labels final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 100) predictions = np.argmax(softmax(np.dot(final_parameters.T, inputs.T)), axis=0) for output, prediction in zip(outputs, predictions): self.assertEqual(output, prediction)
def assertLogisticRegression(self, sampler): data_size = 3 input_size = 5 inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size)) outputs = np.random.randint(0, 2, size=data_size) initial_parameters = np.random.normal(scale=1e-5, size=input_size) # Create cost and gradient function for gradient descent and check its gradient cost_gradient = bind_cost_gradient(logistic_regression_cost_gradient, inputs, outputs, sampler=sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train logistic regression and see if it predicts correct labels final_parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 100) predictions = expit(np.dot(inputs, final_parameters)) > 0.5 # Binary classification of 3 data points with 5 dimension is always linearly separable for output, prediction in zip(outputs, predictions): self.assertEqual(output, prediction)
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_file', type=argparse.FileType()) parser.add_argument('output_file', type=argparse.FileType('w')) parser.add_argument('vector_size', type=int) parser.add_argument('context_size', type=int) args = parser.parse_args() dictionary = {} data = preprocess(args.input_file, dictionary) inputs, outputs = load_data(data, args.context_size) cost_gradient = bind_cost_gradient(skip_gram_cost_gradient, inputs, outputs, sampler=get_stochastic_sampler(100)) initial_parameters = np.random.normal(size=(2, len(dictionary), args.vector_size)) parameters, cost_history = gradient_descent(cost_gradient, initial_parameters, 10) input_vectors, output_vectors = parameters word_vectors = input_vectors + output_vectors words = [word for word, index in sorted(dictionary.items(), key=operator.itemgetter(1))] for word, vector in zip(words, word_vectors): print(word, ' '.join(str(element) for element in vector), file=args.output_file)
def test_multinomial_logistic_regression(self): input_size = 10 output_size = 5 input = np.random.normal(size=(input_size,)) output = np.random.randint(0, output_size) def multinomial_logistic_regression_wrapper(parameters): return multinomial_logistic_regression_cost_gradient(parameters, input, output) initial_parameters = np.random.normal(size=(input_size, output_size)) result = gradient_check(multinomial_logistic_regression_wrapper, initial_parameters) self.assertEqual([], result) # Train multinomial logistic regression and see if it predicts correct label final_parameters, cost_history = gradient_descent( multinomial_logistic_regression_wrapper, initial_parameters, 100) prediction = softmax(np.dot(final_parameters.T, input)) > 0.5 for i in range(len(prediction)): if output == i: self.assertEqual(1, prediction[i]) else: self.assertEqual(0, prediction[i])
def assertLogisticRegression(self, sampler): data_size = 3 input_size = 5 inputs = np.random.uniform(-10.0, 10.0, size=(data_size, input_size)) outputs = np.random.randint(0, 2, size=data_size) initial_parameters = np.random.normal(scale=1e-5, size=input_size) # Create cost and gradient function for gradient descent and check its gradient cost_gradient = bind_cost_gradient(logistic_regression_cost_gradient, inputs, outputs, sampler=sampler) result = gradient_check(cost_gradient, initial_parameters) self.assertEqual([], result) # Train logistic regression and see if it predicts correct labels final_parameters, cost_history = gradient_descent( cost_gradient, initial_parameters, 100) predictions = expit(np.dot(inputs, final_parameters)) > 0.5 # Binary classification of 3 data points with 5 dimension is always linearly separable for output, prediction in zip(outputs, predictions): self.assertEqual(output, prediction)
def test_multinomial_logistic_regression(self): input_size = 10 output_size = 5 input = np.random.normal(size=(input_size, )) output = np.random.randint(0, output_size) def multinomial_logistic_regression_wrapper(parameters): return multinomial_logistic_regression_cost_gradient( parameters, input, output) initial_parameters = np.random.normal(size=(input_size, output_size)) result = gradient_check(multinomial_logistic_regression_wrapper, initial_parameters) self.assertEqual([], result) # Train multinomial logistic regression and see if it predicts correct label final_parameters, cost_history = gradient_descent( multinomial_logistic_regression_wrapper, initial_parameters, 100) prediction = softmax(np.dot(final_parameters.T, input)) > 0.5 for i in range(len(prediction)): if output == i: self.assertEqual(1, prediction[i]) else: self.assertEqual(0, prediction[i])