Exemplo n.º 1
0
class TestPositiveNaiveBayesClassifier(unittest.TestCase):
    def setUp(self):
        sports_sentences = [
            "The team dominated the game",
            "They lost the ball",
            "The game was intense",
            "The goalkeeper catched the ball",
            "The other team controlled the ball",
        ]

        various_sentences = [
            "The President did not comment",
            "I lost the keys",
            "The team won the game",
            "Sara has two kids",
            "The ball went off the court",
            "They had the ball for the whole game",
            "The show is over",
        ]

        self.classifier = PositiveNaiveBayesClassifier(positive_set=sports_sentences, unlabeled_set=various_sentences)

    def test_classifier(self):
        assert_true(isinstance(self.classifier.classifier, nltk.classify.PositiveNaiveBayesClassifier))

    def test_classify(self):
        assert_true(self.classifier.classify("My team lost the game."))
        assert_false(self.classifier.classify("The cat is on the table."))

    def test_update(self):
        orig_pos_length = len(self.classifier.positive_set)
        orig_unlabeled_length = len(self.classifier.unlabeled_set)
        self.classifier.update(
            new_positive_data=["He threw the ball to the base."], new_unlabeled_data=["I passed a tree today."]
        )
        new_pos_length = len(self.classifier.positive_set)
        new_unlabeled_length = len(self.classifier.unlabeled_set)
        assert_equal(new_pos_length, orig_pos_length + 1)
        assert_equal(new_unlabeled_length, orig_unlabeled_length + 1)

    def test_accuracy(self):
        test_set = [
            ("My team lost the game", True),
            ("The ball was in the court.", True),
            ("We should have won the game.", True),
            ("And now for something completely different", False),
            ("I can't believe it's not butter.", False),
        ]
        accuracy = self.classifier.accuracy(test_set)
        assert_true(isinstance(accuracy, float))

    def test_repr(self):
        assert_equal(
            repr(self.classifier),
            "<PositiveNaiveBayesClassifier trained on {0} labeled and {1} unlabeled instances>".format(
                len(self.classifier.positive_set), len(self.classifier.unlabeled_set)
            ),
        )