예제 #1
0
class WordExtractTestCase(unittest.TestCase):

    # Only use setUp() and tearDown() if necessary
    def setUp(self):
        # Using mmseg to test extract word.
        self.we = WeightEngine(segment_function)
        self.term = u"发财"
        self.text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。"

    def tearDown(self):
        pass
    def test_add_record(self):
        self.we.load_record("record.dat")
        self.we.add_record(u"操蛋", 15)
        self.assertEqual(len(self.we._dict), 4)
        self.we.add_record(u"操蛋", 15)
        self.assertEqual(len(self.we._dict), 4)
        self.assertEqual(self.we._dict[u"操蛋"], 30)
        self.assertEqual(self.we.N, 15)

    def test_load_record(self):
        self.we.load_record("record.dat")
        self.assertEqual(len(self.we._dict), 3)
        self.assertEqual(self.we._dict[u"理想"], 15)
        self.assertEqual(self.we._dict[u"文化"], 1)
        self.assertEqual(self.we._dict[u"德行"], 12)
        self.assertNotIn(u"操蛋", self.we._dict)
        self.assertNotIn(u"毛线", self.we._dict)

    def test_save_record(self):
        self.we.load_record("record.dat")
        self.we.save_record("record_save.dat")

    # def test_weight_learning(self):
    #     self.we.weight_learning(read_tiny_mock())
        # self.we.show_dict(sort=True)
        # self.we.show_dict()

    def test_df(self):
        self.we.load_record("record.dat")
        self.assertEqual(self.we.df(u"转发"), 1)
        self.assertEqual(self.we.df(u"理想"), 16)

    def test_idf(self):
        self.we.load_record("record.dat")
        self.assertEqual(self.we.df(u"转发"), 1)
        self.assertEqual(self.we.df(u"理想"), 16)
        self.we.idf("美丽")
        self.we.idf("转发")
        self.we.idf("阿拉")

    def test_prob_idf(self):
        self.we.load_record("record.dat")
        self.we.prob_idf("美丽")
        self.we.prob_idf("转发")
        self.we.prob_idf("阿拉")
    def text_word(self):
        term = u"发财"
        text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。"
        self.assertIn(term, self.we.text_word(text))

    def test_tf(self):
        term = u"发财"
        text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。"
        self.we.tf(term, self.we.text_word(text))

    def test_log_tf(self):
        term = u"发财"
        text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。"
        self.we.log_tf(term, self.we.text_word(text))

    def test_a_tf(self):
        term = u"发财"
        text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。"
        self.we.a_tf(term, self.we.text_word(text))

    def test_b_tf(self):
        term = u"发财"
        text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。"
        self.we.b_tf(term, self.we.text_word(text))
        term = u"dd"
        self.we.b_tf(term, self.we.text_word(text))

    def test_L_tf(self):
        term = u"发财"
        text = u"发财啊,发财啊,操蛋啊,发财啊,不知道什么情况啊。"
        self.we.L_tf(term, self.we.text_word(text))
        term = u"dd"
        self.we.L_tf(term, self.we.text_word(text))