def getBayesAccuracy(splitRatio=0.9): featureset = [] for text in getSpamContent(): featureset.append((featuresForText(text), SPAM)) for text in getHamContent(): featureset.append((featuresForText(text), NOSPAM)) shuffle(featureset) trainset, devset = splitByRatio(featureset, splitRatio) classifier = NaiveBayesClassifier.train(trainset) print classifier.show_most_informative_features(10) return nltk_classify.accuracy(classifier, devset)
def testGetHamContent(self): content = getHamContent() self.assertEqual(len(content), 1171) self.assertTrue(content[0])