class WordExtractTestCase(unittest.TestCase): # Only use setUp() and tearDown() if necessary def setUp(self): # Using mmseg to test extract word. self.we = WeightEngine(segment_function) self.term = u"发财" self.text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。" def tearDown(self): pass def test_add_record(self): self.we.load_record("record.dat") self.we.add_record(u"操蛋", 15) self.assertEqual(len(self.we._dict), 4) self.we.add_record(u"操蛋", 15) self.assertEqual(len(self.we._dict), 4) self.assertEqual(self.we._dict[u"操蛋"], 30) self.assertEqual(self.we.N, 15) def test_load_record(self): self.we.load_record("record.dat") self.assertEqual(len(self.we._dict), 3) self.assertEqual(self.we._dict[u"理想"], 15) self.assertEqual(self.we._dict[u"文化"], 1) self.assertEqual(self.we._dict[u"德行"], 12) self.assertNotIn(u"操蛋", self.we._dict) self.assertNotIn(u"毛线", self.we._dict) def test_save_record(self): self.we.load_record("record.dat") self.we.save_record("record_save.dat") # def test_weight_learning(self): # self.we.weight_learning(read_tiny_mock()) # self.we.show_dict(sort=True) # self.we.show_dict() def test_df(self): self.we.load_record("record.dat") self.assertEqual(self.we.df(u"转发"), 1) self.assertEqual(self.we.df(u"理想"), 16) def test_idf(self): self.we.load_record("record.dat") self.assertEqual(self.we.df(u"转发"), 1) self.assertEqual(self.we.df(u"理想"), 16) self.we.idf("美丽") self.we.idf("转发") self.we.idf("阿拉") def test_prob_idf(self): self.we.load_record("record.dat") self.we.prob_idf("美丽") self.we.prob_idf("转发") self.we.prob_idf("阿拉") def text_word(self): term = u"发财" text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。" self.assertIn(term, self.we.text_word(text)) def test_tf(self): term = u"发财" text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。" self.we.tf(term, self.we.text_word(text)) def test_log_tf(self): term = u"发财" text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。" self.we.log_tf(term, self.we.text_word(text)) def test_a_tf(self): term = u"发财" text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。" self.we.a_tf(term, self.we.text_word(text)) def test_b_tf(self): term = u"发财" text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。" self.we.b_tf(term, self.we.text_word(text)) term = u"dd" self.we.b_tf(term, self.we.text_word(text)) def test_L_tf(self): term = u"发财" text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。" self.we.L_tf(term, self.we.text_word(text)) term = u"dd" self.we.L_tf(term, self.we.text_word(text))