Exemplo n.º 1
0
def getBayesAccuracy(splitRatio=0.9):

    featureset = []

    for text in getSpamContent():
        featureset.append((featuresForText(text), SPAM))
    for text in getHamContent():
        featureset.append((featuresForText(text), NOSPAM))

    shuffle(featureset)
    trainset, devset = splitByRatio(featureset, splitRatio)

    classifier = NaiveBayesClassifier.train(trainset)
    print classifier.show_most_informative_features(10)
    return nltk_classify.accuracy(classifier, devset)
Exemplo n.º 2
0
 def testGetSpamContent(self):
     content = getSpamContent()
     self.assertEqual(len(content), 1732)
     self.assertTrue(content[234])