def __init__(self, training_file=TRAIN_TXT): """Loads training data and creates the recursive neural network.""" self.dt = DataTransformation(training_file) self.num_training_samples = len(list(self.dt.tree_iterator())) self.rng = numpy.random.RandomState(1234) self.rntn = RecursiveNeuralTensorNetwork(self.dt, self.rng)
def test_learning(self, test_file=TEST_TXT): """Tests the neural network against a test file. Outputs the error value for each test input.""" print 'Testing the neural network...' self._load_parameters() self.test_dt = DataTransformation(test_file) tree_iterator = self.test_dt.tree_iterator() cost = 0.0 for tree in tree_iterator: indices = self.test_dt.get_word_indices(tree) cost += self.rntn.forward_pass(indices) print cost.eval({})
def main(): try: data_transformation = DataTransformation(constants.data_dir) logging.info('Data is being processed and getting transformed') data_transformation.parse_data() user_assessment_scores_tab = data_transformation.transform_user_assessment_scores() user_course_views_tab = data_transformation.transform_user_course_views() user_interests_tab = data_transformation.transform_user_interests() agg_df = data_transformation.aggregate_data(user_course_views_tab, user_interests_tab, user_assessment_scores_tab) logging.info('Applying SVD') model = Model() (users, _, _) = model.train(agg_df) logging.info('Calculating similarity matrix and saving the csv') similarity = Similarity(constants.file_name) similarity.create_similarity_matrix(users, agg_df) logging.info('Ingesting data into postgres') ingestion = DataIngestion() ingestion.create_table() ingestion.insert_values(constants.file_name) logging.info('Process completed') except Exception as e: logging.error(e)
class Learning(object): def __init__(self, training_file=TRAIN_TXT): """Loads training data and creates the recursive neural network.""" self.dt = DataTransformation(training_file) self.num_training_samples = len(list(self.dt.tree_iterator())) self.rng = numpy.random.RandomState(1234) self.rntn = RecursiveNeuralTensorNetwork(self.dt, self.rng) def batch_learn(self, training_iterations=100, learning_rate=0.01): """Trains the neural network with batch learning.""" print 'Training the neural network with batch learning...' self.mini_batch_learn(training_iterations=training_iterations, mini_batch_size=self.num_training_samples, learning_rate=learning_rate) def online_learn(self, training_iterations=100, learning_rate=0.01): """Trains the neural network with online learning, using a mini-batch size of 1, that is, updating the weights after each input forward pass.""" print 'Training the neural network with online learning...' self.mini_batch_learn(training_iterations=training_iterations, mini_batch_size=1, learning_rate=learning_rate) def mini_batch_learn(self, training_iterations=100, mini_batch_size=10, learning_rate=0.01): """Trains the neural network using mini-batch learning.""" for i in range(training_iterations): cost = 0.0 tree_iterator = self.dt.tree_iterator() batch_num = 1 for i, tree in enumerate(tree_iterator): indices = self.dt.get_word_indices(tree) cost += self.rntn.forward_pass(indices) if (i+1) % mini_batch_size == 0: gparams = [T.grad(cost, theta) for theta in self.rntn.theta] updates = [(param, param - learning_rate * \ gtheta) for param, gtheta in zip(self.rntn.theta, gparams)] for e in updates: tmp_new = e[1].eval({}) e[0].set_value(tmp_new) print 'Batch %i cost: %f' % (batch_num, cost.eval({})) batch_num += 1 cost = 0.0 self._pickle_parameters() def _pickle_parameters(self): """Pickles the current values of the neural network parameters.""" params = [e.get_value() for e in self.rntn.theta] with open(PARAMS_PICKLED, 'w') as f: pickle.dump(params, f) def _load_parameters(self): """Sets the neural network parameters to those values stored in the pickled file.""" with open(PARAMS_PICKLED, 'r') as f: params = pickle.load(f) for i, e in enumerate(params): self.rntn.theta[i].set_value(e) def test_learning(self, test_file=TEST_TXT): """Tests the neural network against a test file. Outputs the error value for each test input.""" print 'Testing the neural network...' self._load_parameters() self.test_dt = DataTransformation(test_file) tree_iterator = self.test_dt.tree_iterator() cost = 0.0 for tree in tree_iterator: indices = self.test_dt.get_word_indices(tree) cost += self.rntn.forward_pass(indices) print cost.eval({})
def __init__(self, data, labels, metrics): self.data = DataTransformation(data, metrics) self._time, self.nmi, self.ari, self.fms = self.clustering( data, labels)
class Learning(object): def __init__(self, training_file=TRAIN_TXT): """Loads training data and creates the recursive neural network.""" self.dt = DataTransformation(training_file) self.num_training_samples = len(list(self.dt.tree_iterator())) self.rng = numpy.random.RandomState(1234) self.rntn = RecursiveNeuralTensorNetwork(self.dt, self.rng) def batch_learn(self, training_iterations=100, learning_rate=0.01): """Trains the neural network with batch learning.""" print 'Training the neural network with batch learning...' self.mini_batch_learn(training_iterations=training_iterations, mini_batch_size=self.num_training_samples, learning_rate=learning_rate) def online_learn(self, training_iterations=100, learning_rate=0.01): """Trains the neural network with online learning, using a mini-batch size of 1, that is, updating the weights after each input forward pass.""" print 'Training the neural network with online learning...' self.mini_batch_learn(training_iterations=training_iterations, mini_batch_size=1, learning_rate=learning_rate) def mini_batch_learn(self, training_iterations=100, mini_batch_size=10, learning_rate=0.01): """Trains the neural network using mini-batch learning.""" for i in range(training_iterations): cost = 0.0 tree_iterator = self.dt.tree_iterator() batch_num = 1 for i, tree in enumerate(tree_iterator): indices = self.dt.get_word_indices(tree) cost += self.rntn.forward_pass(indices) if (i + 1) % mini_batch_size == 0: gparams = [ T.grad(cost, theta) for theta in self.rntn.theta ] updates = [(param, param - learning_rate * \ gtheta) for param, gtheta in zip(self.rntn.theta, gparams)] for e in updates: tmp_new = e[1].eval({}) e[0].set_value(tmp_new) print 'Batch %i cost: %f' % (batch_num, cost.eval({})) batch_num += 1 cost = 0.0 self._pickle_parameters() def _pickle_parameters(self): """Pickles the current values of the neural network parameters.""" params = [e.get_value() for e in self.rntn.theta] with open(PARAMS_PICKLED, 'w') as f: pickle.dump(params, f) def _load_parameters(self): """Sets the neural network parameters to those values stored in the pickled file.""" with open(PARAMS_PICKLED, 'r') as f: params = pickle.load(f) for i, e in enumerate(params): self.rntn.theta[i].set_value(e) def test_learning(self, test_file=TEST_TXT): """Tests the neural network against a test file. Outputs the error value for each test input.""" print 'Testing the neural network...' self._load_parameters() self.test_dt = DataTransformation(test_file) tree_iterator = self.test_dt.tree_iterator() cost = 0.0 for tree in tree_iterator: indices = self.test_dt.get_word_indices(tree) cost += self.rntn.forward_pass(indices) print cost.eval({})
def __init__(self, data, labels, encoder): self.data = DataTransformation(data, 'spearman') self.encoder = encoder self.encoder.fit(data) self.data = self.encoder.transform(data)