Exemplo n.º 1
0
def main(mode='test'):
    cl = Classifier()
    cl.create_db('bunyk.db')

    if mode == 'test':
        test(cl)
    else:
        train(cl, 'http://bunyk.wordpress.com')
Exemplo n.º 2
0
class TestClassifier(unittest.TestCase):
    def setUp(self):
        from classifier import Classifier
        self.cl = Classifier()
        self.cl.create_db(':memory:')

    def test_fprob(self):
        for x in range(9):
            self.cl.train('viagra', 'spam')
        self.cl.train('penis', 'spam')
        self.assertEquals(self.cl.fprob('viagra', 'spam'), 0.9)

    def test_weighted_prob_unknown(self):
        self.assertEquals(0.5,
            self.cl.weightedprob('viagra', 'spam')
        )

    def test_weighted_prob_biased(self):
        self.cl.train('viagra', 'spam')
        self.assertEquals(0.75,
            self.cl.weightedprob('viagra', 'spam')
        )

    def test_train_adds_category(self):
        self.cl.train('1 2', 1)
        self.assertEquals(list(self.cl.categories()), [1])

    def test_train(self):
        self.cl.train('1 2', 1)
        self.cl.train('2 3', 1)
        self.cl.train('3 4', 2)

        self.assertEquals(list(self.cl.categories()), [1, 2])

    def test_check_plain(self):
        tmp_prob = self.cl.weightedprob
        self.cl.weightedprob = self.cl.fprob

        for i in range(9):
            self.cl.train('penis', 'spam')
        self.cl.train('penis', 'ok')

        self.assertEquals(self.cl.check('penis')[0], ('spam', 0.9))

        self.cl.weightedprob = tmp_prob