class TestClassifier(unittest.TestCase): """ Tests the Classifier class. """ def setUp(self): self.c = Classifier(CleanTextUtil("french")) def tearDown(self): rm_data_dir() def test_add_text(self): """ Tests add_text. Add a text to the classifier: 1- Verify if the number of text equals 1. 2- Verify if the text added is equals to words wanted. """ flux1_text = ( u"Comment Google classe les pages Internet " u"Bientôt une sphère pour remplacer souris et écrans tactiles ? " u"Le clip kitsch du couple présidentiel chinois" ) flux1_text_wanted = [ "bient", "chinois", "class", "clip", "comment", "coupl", "cran", "googl", "internet", "kitsch", "le", "pag", "pr", "re", "remplac", "sidentiel", "sour", "sph", "tactil" ] self.c.add_text(flux1_text) self.assertEquals(int(self.c.classifier_state_db.get("text_nb")), 1) # 1 words = [word for word, _ in kc_util.gen_db(self.c.dictionary_db.cursor())] self.assertEquals(words, flux1_text_wanted) # 2 def test_set_idf(self): """ Tests set_idf. Add two texts: 1- Verify idf equals 0.0 Add idf: 2- Verify idf not equals 0.0 """ self.c.add_text("foo") self.c.add_text("bar") # important for idf _, word_info = kc_util.gen_db(self.c.dictionary_db.cursor()).next() self.assertEquals(word_info.idf, 0.0) # 1 self.c.set_idf() _, word_info = kc_util.gen_db(self.c.dictionary_db.cursor()).next() self.assertNotEquals(word_info.idf, 0.0) # 2 def test_set_idf_tfidf_norm(self): """ Tests set_idf_tfidf_norm. Add two texts: 1- Verify idf equals 0.0 2- Verify idf norm equals '0.0' Update idf: 2- Verify idf not equals 0.0 3- Verify idf norm not equals '0.0' """ text, vector_1 = "foo", "foo_1" self.c.add_text(text) self.c.add_text("bar") # important for idf _, word_info = kc_util.gen_db(self.c.dictionary_db.cursor()).next() self.assertEquals(word_info.idf, 0.0) # 1 self.c.add_vector(vector_1, text) norm = self.c.vectors_norm_db.get(vector_1) self.assertEquals(norm, '0.0') # 2 self.c.set_idf() _, word_info = kc_util.gen_db(self.c.dictionary_db.cursor()).next() self.assertNotEquals(word_info.idf, 0.0) # 3 self.c.set_tfidf_norm() norm = self.c.vectors_norm_db.get(vector_1) self.assertNotEquals(norm, '0.0') # 4 def test_add_vector(self): """ Tests add_vector. Add a text. 1- Check if there is not a vector. Add a vector: 2- Check if there is a vector. """ text, vector_1 = "foo", "foo_1" self.c.add_text(text) vector = self.c.get_vector(vector_1) self.assertIsNone(vector) # 1 self.c.add_vector(vector_1, text) vector = self.c.get_vector(vector_1) self.assertIsInstance(vector, Vector) # 2 def test_rm_vector(self): """ Tests rm_vector. Add a text. Add a vector: 1- Check if the vector exists. Remove the vector: 2- Check if the vector doesn't exist anymore. """ text, vector_1 = "foo", "foo_1" self.c.add_text(text) self.c.add_vector(vector_1, text) vector = self.c.get_vector(vector_1) self.assertIsNotNone(vector) # 1 self.c.rm_vector(vector_1) vector = self.c.get_vector(vector_1) self.assertIsNone(vector) # 2 def test_get_vector(self): """ Tests get_vector. Add a text. Add a vector. 1- Check if the vector exists with "get_vector". Get an unknown vector. 2- Check if the vector is None. """ text, vector_1 = "foo", "foo_1" self.c.add_text(text) self.c.add_vector(vector_1, text) vector = self.c.get_vector(vector_1) self.assertIsNotNone(vector) # 1 vector = self.c.get_vector("unknown vector") self.assertIsNone(vector) # 2 def test_get_vectors(self): """ Tests get_vectors. Add two texts. Add two vectors. With get_vectors: 1- Verify names equals by comparing names. 2- Verify object equals by comparing tags. """ text_1, vector_1, tag_1 = "foo", "foo_1", "SPORT" text_2, vector_2, tag_2 = "bar", "bar_1", "BUSINESS" self.c.add_text(text_1) self.c.add_text(text_2) self.c.add_vector(vector_1, text_1, tag_1) self.c.add_vector(vector_2, text_2, tag_2) names = [name for name, _ in self.c.get_vectors()] self.assertEquals(set(names), set(self.c.get_vectors_name())) # 1 tags = [vector.tag for _, vector in self.c.get_vectors()] self.assertEquals(set([tag_1, tag_2]), set(tags)) # 2 def test_get_vectors_name(self): """ Tests get_vectors_name. Add two texts. Add two vectors. Get vectors names: 1- Verify names equals. """ text_1, vector_1 = "foo", "foo_1" text_2, vector_2 = "bar", "bar_1" self.c.add_text(text_1) self.c.add_text(text_2) self.c.add_vector(vector_1, text_1) self.c.add_vector(vector_2, text_2) names = self.c.get_vectors_name() self.assertEquals(set([vector_1,vector_2]), set(names)) # 1 def test_idf(self): #TODO """ Test . """ pass def test_tf_idf(self): """ Test . """ pass def test_vector_tfidf_norm(self): """ Test . """ pass def test_scalar_product(self): """ Test . """ pass def test_cosine_sim(self): """ Test . """ pass def test_kNN(self): """ Test . """ pass def test_get_category(self): """ Test . """ pass def test_print_words_structure(self): """ Test . """ pass def test_print_vector(self): """ Test . """ pass def test_print_vectors(self): """ Test . """ pass