def main(mode='test'): cl = Classifier() cl.create_db('bunyk.db') if mode == 'test': test(cl) else: train(cl, 'http://bunyk.wordpress.com')
class TestClassifier(unittest.TestCase): def setUp(self): from classifier import Classifier self.cl = Classifier() self.cl.create_db(':memory:') def test_fprob(self): for x in range(9): self.cl.train('viagra', 'spam') self.cl.train('penis', 'spam') self.assertEquals(self.cl.fprob('viagra', 'spam'), 0.9) def test_weighted_prob_unknown(self): self.assertEquals(0.5, self.cl.weightedprob('viagra', 'spam') ) def test_weighted_prob_biased(self): self.cl.train('viagra', 'spam') self.assertEquals(0.75, self.cl.weightedprob('viagra', 'spam') ) def test_train_adds_category(self): self.cl.train('1 2', 1) self.assertEquals(list(self.cl.categories()), [1]) def test_train(self): self.cl.train('1 2', 1) self.cl.train('2 3', 1) self.cl.train('3 4', 2) self.assertEquals(list(self.cl.categories()), [1, 2]) def test_check_plain(self): tmp_prob = self.cl.weightedprob self.cl.weightedprob = self.cl.fprob for i in range(9): self.cl.train('penis', 'spam') self.cl.train('penis', 'ok') self.assertEquals(self.cl.check('penis')[0], ('spam', 0.9)) self.cl.weightedprob = tmp_prob