Exemplo n.º 1
0
def trainNaiveBayes(spamdir, hamdir):
    allFeatures = []
    classifications = []

    for f in listdir_fullpath(spamdir):
        allFeatures += [featuresForMail(f)]
        classifications += [True]

    for f in listdir_fullpath(hamdir):
        allFeatures += [featuresForMail(f)]
        classifications += [False]

    features = zip(allFeatures, classifications)
    classi = NaiveBayesClassifier.train(features)
    return classi
Exemplo n.º 2
0
from features import featuresForMail
from sys import argv

# do cross validation on training/testing data

if __name__ == '__main__':
    if len(argv) == 3:
        hamdir = argv[1] + '/ham'
        spamdir = argv[2] + '/spam'
        classi = trainNaiveBayes(hamdir, spamdir)

        total = 0
        good = 0

        for f in listdir_fullpath(argv[1] + '/ham'):
            features = featuresForMail(f)
            total += 1
            if classi.classify(features) == False:
                good += 1

        for f in listdir_fullpath(argv[1] + '/spam'):
            features = featuresForMail(f)
            total += 1
            if classi.classify(features) == True:
                good += 1

        print "accuracy: " + str(float(int(float(good)/float(total)*10000))/100) + "%"

    else:
        print "usage: python applyCrossval.py <training directory> <testing directory>"