def test_allPredictionsWrong(self): # Prepare the SUT # Create an artificial email classification dictionary truth_dict = create_classification() create_truth_and_prediction_file(truth_dict, invert_classes) # Excercise the SUT q = compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, 0.0)
def test_allPredictionsWrong(self): # Prepare the SUT # Create an artificial email classification dictionary truth_dict = create_classification() create_truth_and_prediction_file(truth_dict, invert_classes) # Excercise the SUT with replaced_open(): # Insist on explicit use of encoding q = self.compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, 0.0)
def create_identical_truth_and_prediction_file(): """ Create identical !truth.txt and !prediction.txt files in the corpus directory. Here we assume that the corpus directory already exists. """ # Create an artificial email classification dictionary class_dict = create_classification() # Compile the filepaths truth_filepath = os.path.join(CORPUS_DIR, TRUTH_FILENAME) pred_filepath = os.path.join(CORPUS_DIR, PREDICTION_FILANAME) # Save the same dictionary as both the !truth.txt and !prediction.txt save_classification_to_file(class_dict, truth_filepath) save_classification_to_file(class_dict, pred_filepath)
def test_allPredictionsHam_for10SpamsAnd10Hams(self): # Prepare the SUT # Create an artificial email classification dictionary truth_dict = create_classification(n_items=20, n_spams=10) create_truth_and_prediction_file(truth_dict, hams_only) # Since there are 10 spams and 10 hams in the corpus, # and predictions are all ham, then the confusion matrix # shall have TN = 10 and FN = 10, zero positives. # The modified accuracy then is: expected_q = 10 / (10 + 10) # Excercise the SUT q = compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, expected_q)
def test_allPredictionsHam_for10SpamsAnd20Hams(self): # Prepare the SUT # Create an artificial email classification dictionary truth_dict = create_classification(n_items=30, n_spams=10) create_truth_and_prediction_file(truth_dict, hams_only) # Since there are 10 spams and 20 hams in the corpus, # and predictions are all ham, then the confusion matrix # shall have TN = 20 and FN = 10, zero positives. # The modified accuracy then is: expected_q = 20 / (20 + 10) # Excercise the SUT with replaced_open(): # Insist on explicit use of encoding q = self.compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, expected_q)
def create_inverse_truth_and_prediction_file(): """ Create inverse !truth.txt and !prediction.txt files in the corpus directory. Here we assume that the corpus directory already exists. """ # Create an artificial truth dictionary truth_dict = create_classification() # Create an inverted version of truth_dict pred_dict = invert_classes(truth_dict) # Compile the filepaths truth_filepath = os.path.join(CORPUS_DIR, TRUTH_FILENAME) pred_filepath = os.path.join(CORPUS_DIR, PREDICTION_FILANAME) # Save the dictionaries in !truth.txt and !prediction.txt, respectively. save_classification_to_file(truth_dict, truth_filepath) save_classification_to_file(pred_dict, pred_filepath)
def test_1FP2FN_for10SpamsAnd20Hams(self): # Prepare the SUT # Create an artificial email classification dictionary truth_dict = create_classification(n_items=30, n_spams=10) create_truth_and_prediction_file(truth_dict, n_FP_n_FN) # Since there are 10 spams and 20 hams in the corpus, # and predictions are all correct except 1 FP and 2 FN, # then the confusion matrix shall have # FP = 1, FN = 2, # TN = 19, TP = 8. # The modified accuracy then is: expected_q = (8 + 19) / (8 + 19 + 10 * 1 + 2) # Excercise the SUT with replaced_open(): # Insist on explicit use of encoding q = self.compute_quality_for_corpus(CORPUS_DIR) # Assertions self.assertEqual(q, expected_q)