Exemplo n.º 1
0
 def test_init_no_training(self):
     classifier = MultinomialNB()
     assert classifier.vocabulary == set()
     assert classifier.labels == set()
     classifier.train(*self.training_docs)
     self.test_labels()
     self.test_vocabulary()
Exemplo n.º 2
0
 def setup(self):
     self.training_docs = [('Chinese Bejing Chinese', 'yes'),
                           ('Chinese Chinese Shanghai', 'yes'),
                           ('Chinese Macao', 'yes'),
                           ('Tokyo Japan Chinese', 'no')]
     self.training_docs = [(x.split(), y) for x, y in self.training_docs]
     self.classifier = MultinomialNB(*self.training_docs)
     self.make_snapshot()
Exemplo n.º 3
0
    def test_top_features(self):
        docs = [(['happy', 'joy', 'smile'], 'positive'),
                (['happy', 'joy', 'frown'], 'positive'),
                (['sad', 'frown', 'tired'], 'negative'),
                (['sad', 'tired', 'bored'], 'negative')]
        classifier = MultinomialNB()
        classifier.top_features = 2
        classifier.train(*docs)

        result = classifier._most_common['positive'].store
        assert result == {'happy': 2, 'joy': 2}
        result = classifier._most_common['negative'].store
        assert result == {'sad': 2, 'tired': 2}

        first = classifier.prob_all(['happy', 'smile'])
        second = classifier.prob_all(['happy', 'smile', 'smile'])
        assert first == second, classifier._most_common

        first = classifier.prob_all(['sad', 'tired'])['negative']
        second = classifier.prob_all(['sad', 'tired', 'frown'])['negative']
        assert first == second, classifier._most_common
Exemplo n.º 4
0
 def test_ngrams_multinomialnb(self):
     # Integration test with Naive Bayes classifier.
     classifier = MultinomialNB()
     self.extractor.min_n, self.extractor.max_n = 1, 3
     features = self.extractor.extract(self.document)
     classifier.train([features, 'positive'])
Exemplo n.º 5
0
 def test_train_many_document(self):
     documents = [(['one', 'document', 'already', 'tokenized'], 'label')
                  ] * 5
     classifier = MultinomialNB(*documents)
     expected = set(['one', 'document', 'already', 'tokenized'])
     assert classifier.vocabulary == expected