def get_all_vectors(file_with_lists, dictionary_file=None): dictionary = [] if dictionary_file is not None: dictionary = [line.strip() for line in open(dictionary_file)] with open(file_with_lists) as f: lines = f.readlines() labels = {} vector_matrix = [] for i in xrange(len(lines)): labels[lines[i].strip()] = i vector_matrix.append( normalize_vector(get_n_grams_vec(lines[i])) ) return labels, vector_matrix
def test_n_grams_map_to_vec(self): self.assertItemsEqual([ 1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0 ], get_n_grams_vec("to jest przykładowy tekst", n_grams=1))
def test_n_grams_map_to_vec(self): self.assertItemsEqual([1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 1.0, 2.0, 4.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0], get_n_grams_vec("to jest przykładowy tekst", n_grams=1))