def main(): print "Creating Train File..." Data.CreateDataFile("mails-train", "mails-train.txt") print "Initializing Train File..." trainingSet = Data.LoadFile("mails-train.txt") print "Creating Test File..." Data.CreateDataFile("mails-test", "mails-test.txt") print "Initializing Test File..." testSet = Data.LoadFile("mails-test.txt") print "Extracting Classes.." classes = Train.ExtractClasses(trainingSet) print "Training NBC..." vocabulary, prior, condprob = Train.TrainMultinomialNaiveBayes( classes, trainingSet) print "Testing Accuracy..." percentage = Test.Accuracy(classes, vocabulary, prior, condprob, testSet) print "The percentage of correct predictions is ", 100 * percentage, "percent." print "Get Random Document..." testDocument = Data.GetDocument(testSet) print "Do A Time Measurement of the Application of the NBC..." print "The time is took to do a single application of the NBC on a document is", Test.TimeMeasure( classes, vocabulary, prior, condprob, testDocument), "seconds." print "Applying NBC on Document..." topClass, score = Classify.ApplyMultinomialNaiveBayes( classes, vocabulary, prior, condprob, testDocument['document'])
def TimeMeasure(classes, vocabulary, prior, condprob, document): # Neemt een tijdmeting voor en na het uitvoeren van ApplyMultinomialNBC om te kijken # hoelang het duurt om deze uit te voeren op het gegeven document. start = time.time() Classify.ApplyMultinomialNaiveBayes(classes, vocabulary, prior, condprob, document) end = time.time() return end - start
def testSingleFile(self): f = askopenfile(mode='r', defaultextension=".txt") if f is None: # askopenfile return `None` if dialog closed with "cancel". return lines = f.read() bagOfWords = re.split(' ',lines) singleFile = Data.Normalize(bagOfWords) print "Loaded." print "Calculating..." topClass, score = Classify.ApplyMultinomialNaiveBayes(self.classes, self.vocabulary, self.prior, self.condprob, singleFile) print "This document belongs to", topClass print "Done." f.close() # `()` was missing.
def Accuracy(classes, vocabulary, prior, condprob, dataset): # Voert ApplyMulitnomialNBC uit op een serie documenten, waarvan we de class kennen. # Er wordt geteld hoevaak de voorspelling overeenkomt met de echte waarde. # De teruggegeven waarde is een fractie tussen 0 en 1 die aangeeft welk deel van de keren de voorspelling correct was. correct = 0 totaal = 0 count = 0 for c in dataset: for d in dataset[c]: topClass, score = Classify.ApplyMultinomialNaiveBayes( classes, vocabulary, prior, condprob, dataset[c][d]) if topClass == c: correct += 1 totaal += 1 print "The amount of total tested documents is", totaal print "The amount of correct predictions are", correct return correct / totaal