def TestSpamFilter(driver): bayes = driver.manager.classifier_data.bayes nspam = bayes.nspam nham = bayes.nham original_bayes = copy.copy(driver.manager.classifier_data.bayes) # for each watch folder, create a spam message, and do the training thang for msf_watch, folder_watch in driver.GetWatchFolderGenerator(): print "Performing Spam test on watch folder '%s'..." % msf_watch.GetFQName() # Create a spam message in the Inbox - it should get immediately filtered msg, words = driver.CreateTestMessageInFolder(SPAM, folder_watch) # sleep to ensure filtering. WaitForFilters() # It should no longer be in the Inbox. driver.CheckMessageFilteredFrom(folder_watch) # It should be in the "sure is spam" folder. spam_msg = driver.FindTestMessage(driver.folder_spam) if spam_msg is None: TestFailed("The test message vanished from the Inbox, but didn't appear in Spam") # Check that none of the above caused training. if nspam != bayes.nspam: TestFailed("Something caused a new spam message to appear") if nham != bayes.nham: TestFailed("Something caused a new ham message to appear") check_words(words, bayes, 0, 0) # Now move the message back to the inbox - it should get trained. store_msg = driver.manager.message_store.GetMessage(spam_msg) import train if train.been_trained_as_ham(store_msg, driver.manager.classifier_data): TestFailed("This new spam message should not have been trained as ham yet") if train.been_trained_as_spam(store_msg, driver.manager.classifier_data): TestFailed("This new spam message should not have been trained as spam yet") spam_msg.Move(folder_watch) WaitForFilters() spam_msg = driver.FindTestMessage(folder_watch) if spam_msg is None: TestFailed("The message appears to have been filtered out of the watch folder") store_msg = driver.manager.message_store.GetMessage(spam_msg) need_untrain = True try: if nspam != bayes.nspam: TestFailed("There were not the same number of spam messages after a re-train") if nham+1 != bayes.nham: TestFailed("There was not one more ham messages after a re-train") if train.been_trained_as_spam(store_msg, driver.manager.classifier_data): TestFailed("This new spam message should not have been trained as spam yet") if not train.been_trained_as_ham(store_msg, driver.manager.classifier_data): TestFailed("This new spam message should have been trained as ham now") # word infos should have one extra ham check_words(words, bayes, 0, 1) # Now move it back to the Spam folder. # This should see the message un-trained as ham, and re-trained as Spam spam_msg.Move(driver.folder_spam) WaitForFilters() spam_msg = driver.FindTestMessage(driver.folder_spam) if spam_msg is None: TestFailed("Could not find the message in the Spam folder") store_msg = driver.manager.message_store.GetMessage(spam_msg) if nspam +1 != bayes.nspam: TestFailed("There should be one more spam now") if nham != bayes.nham: TestFailed("There should be the same number of hams again") if not train.been_trained_as_spam(store_msg, driver.manager.classifier_data): TestFailed("This new spam message should have been trained as spam by now") if train.been_trained_as_ham(store_msg, driver.manager.classifier_data): TestFailed("This new spam message should have been un-trained as ham") # word infos should have one extra spam, no extra ham check_words(words, bayes, 1, 0) # Move the message to another folder, and make sure we still # identify it correctly as having been trained. # Move to the "unsure" folder, just cos we know about it, and # we know that no special watching of this folder exists. spam_msg.Move(driver.folder_unsure) spam_msg = driver.FindTestMessage(driver.folder_unsure) if spam_msg is None: TestFailed("Could not find the message in the Unsure folder") store_msg = driver.manager.message_store.GetMessage(spam_msg) if not train.been_trained_as_spam(store_msg, driver.manager.classifier_data): TestFailed("Message was not identified as Spam after moving") # word infos still be 'spam' check_words(words, bayes, 1, 0) # Now undo the damage we did. was_spam = train.untrain_message(store_msg, driver.manager.classifier_data) if not was_spam: TestFailed("Untraining this message did not indicate it was spam") if train.been_trained_as_spam(store_msg, driver.manager.classifier_data) or \ train.been_trained_as_ham(store_msg, driver.manager.classifier_data): TestFailed("Untraining this message kept it has ham/spam") need_untrain = False finally: if need_untrain: train.untrain_message(store_msg, driver.manager.classifier_data) # Check all the counts are back where we started. if nspam != bayes.nspam: TestFailed("Spam count didn't get back to the same") if nham != bayes.nham: TestFailed("Ham count didn't get back to the same") check_words(words, bayes, 0, 0) if bayes.wordinfo != original_bayes.wordinfo: TestFailed("The bayes object's 'wordinfo' did not compare the same at the end of all this!") if bayes.probcache != original_bayes.probcache: TestFailed("The bayes object's 'probcache' did not compare the same at the end of all this!") spam_msg.Delete() print "Created a Spam message, and saw it get filtered and trained."
def TestSpamFilter(driver): bayes = driver.manager.classifier_data.bayes nspam = bayes.nspam nham = bayes.nham original_bayes = copy.copy(driver.manager.classifier_data.bayes) for msf_watch, folder_watch in driver.GetWatchFolderGenerator(): print("Performing Spam test on watch folder '%s'..." % msf_watch.GetFQName()) msg, words = driver.CreateTestMessageInFolder(SPAM, folder_watch) WaitForFilters() driver.CheckMessageFilteredFrom(folder_watch) spam_msg = driver.FindTestMessage(driver.folder_spam) if spam_msg is None: TestFailed("The test message vanished from the Inbox, but didn't appear in Spam") if nspam != bayes.nspam: TestFailed("Something caused a new spam message to appear") if nham != bayes.nham: TestFailed("Something caused a new ham message to appear") check_words(words, bayes, 0, 0) store_msg = driver.manager.message_store.GetMessage(spam_msg) driver.manager.classifier_data.message_db.load_msg(store_msg) import train if train.been_trained_as_ham(store_msg): TestFailed("This new spam message should not have been trained as ham yet") if train.been_trained_as_spam(store_msg): TestFailed("This new spam message should not have been trained as spam yet") spam_msg.Move(folder_watch) WaitForFilters() spam_msg = driver.FindTestMessage(folder_watch) if spam_msg is None: TestFailed("The message appears to have been filtered out of the watch folder") store_msg = driver.manager.message_store.GetMessage(spam_msg) driver.manager.classifier_data.message_db.load_msg(store_msg) need_untrain = True try: if nspam != bayes.nspam: TestFailed("There were not the same number of spam messages after a re-train") if nham+1 != bayes.nham: TestFailed("There was not one more ham messages after a re-train") if train.been_trained_as_spam(store_msg): TestFailed("This new spam message should not have been trained as spam yet") if not train.been_trained_as_ham(store_msg): TestFailed("This new spam message should have been trained as ham now") check_words(words, bayes, 0, 1) spam_msg.Move(driver.folder_spam) WaitForFilters() spam_msg = driver.FindTestMessage(driver.folder_spam) if spam_msg is None: TestFailed("Could not find the message in the Spam folder") store_msg = driver.manager.message_store.GetMessage(spam_msg) driver.manager.classifier_data.message_db.load_msg(store_msg) if nspam +1 != bayes.nspam: TestFailed("There should be one more spam now") if nham != bayes.nham: TestFailed("There should be the same number of hams again") if not train.been_trained_as_spam(store_msg): TestFailed("This new spam message should have been trained as spam by now") if train.been_trained_as_ham(store_msg): TestFailed("This new spam message should have been un-trained as ham") check_words(words, bayes, 1, 0) spam_msg.Move(driver.folder_unsure) spam_msg = driver.FindTestMessage(driver.folder_unsure) if spam_msg is None: TestFailed("Could not find the message in the Unsure folder") store_msg = driver.manager.message_store.GetMessage(spam_msg) driver.manager.classifier_data.message_db.load_msg(store_msg) if not train.been_trained_as_spam(store_msg): TestFailed("Message was not identified as Spam after moving") check_words(words, bayes, 1, 0) was_spam = train.untrain_message(store_msg, driver.manager.classifier_data) driver.manager.classifier_data.message_db.load_msg(store_msg) if not was_spam: TestFailed("Untraining this message did not indicate it was spam") if train.been_trained_as_spam(store_msg) or \ train.been_trained_as_ham(store_msg): TestFailed("Untraining this message kept it has ham/spam") need_untrain = False finally: if need_untrain: train.untrain_message(store_msg, driver.manager.classifier_data) if nspam != bayes.nspam: TestFailed("Spam count didn't get back to the same") if nham != bayes.nham: TestFailed("Ham count didn't get back to the same") check_words(words, bayes, 0, 0) if bayes.wordinfo != original_bayes.wordinfo: TestFailed("The bayes object's 'wordinfo' did not compare the same at the end of all this!") if bayes.probcache != original_bayes.probcache: TestFailed("The bayes object's 'probcache' did not compare the same at the end of all this!") spam_msg.Delete() print("Created a Spam message, and saw it get filtered and trained.")