Пример #1
0
def getBayesAccuracy(splitRatio=0.9):

    featureset = []

    for text in getSpamContent():
        featureset.append((featuresForText(text), SPAM))
    for text in getHamContent():
        featureset.append((featuresForText(text), NOSPAM))

    shuffle(featureset)
    trainset, devset = splitByRatio(featureset, splitRatio)

    classifier = NaiveBayesClassifier.train(trainset)
    print classifier.show_most_informative_features(10)
    return nltk_classify.accuracy(classifier, devset)
Пример #2
0
 def testGetHamContent(self):
     content = getHamContent()
     self.assertEqual(len(content), 1171)
     self.assertTrue(content[0])