/
test_trainingcorpus.py
executable file
·115 lines (99 loc) · 4.19 KB
/
test_trainingcorpus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Tests for the TrainingCorpus class.
TrainingCorpus assumes that the !truth.txt file exists
in the corpus directory and uses the contained information
to provide additional functionality.
"""
import unittest
from trainingcorpus import TrainingCorpus
import os
import random
from collections import Counter
from test_readClassificationFromFile import save_classification_to_file
from test_corpus import (
create_corpus_dictionary,
create_corpus_dir_from_dictionary,
delete_corpus_directory,
CORPUS_DIR
)
HAM_TAG = 'OK'
SPAM_TAG = 'SPAM'
TRUTH_FILENAME = '!truth.txt'
class TrainingCorpusTest(unittest.TestCase):
def setUp(self):
"""Prepare fake corpus with !truth.txt file."""
self.email_dict = create_corpus_dictionary()
self.true_class = create_classification_for(self.email_dict.keys())
create_corpus_dir_from_dictionary(self.email_dict)
truth_filepath = os.path.join(CORPUS_DIR, TRUTH_FILENAME)
save_classification_to_file(self.true_class, fname=truth_filepath)
self.tc = TrainingCorpus(CORPUS_DIR)
def tearDown(self):
delete_corpus_directory()
def test_getClass(self):
"""Test the get_class method."""
for key, exp_class in self.true_class.items():
obs_class = self.tc.get_class(key)
self.assertEqual(
exp_class, obs_class,
'The expected class of email {} is {}, but {} was observed'.format(
key, exp_class, obs_class)
)
def test_isSpam(self):
"""Test the is_spam method."""
for key, exp_class in self.true_class.items():
exp_spam = (exp_class==SPAM_TAG)
obs_spam = self.tc.is_spam(key)
self.assertEqual(
exp_spam, obs_spam,
'The email {} spamminess: expected {}, observed {}.'.format(
key, str(exp_spam), str(obs_spam))
)
def test_isHam(self):
"""Test the is_ham method."""
for key, exp_class in self.true_class.items():
exp_ham = (exp_class==HAM_TAG)
obs_ham = self.tc.is_ham(key)
self.assertEqual(
exp_ham, obs_ham,
'The email {} hamminess: expected {}, observed {}.'.format(
key, str(exp_ham), str(obs_ham))
)
def test_spams(self):
"""Test spams() method."""
obs_num_spams = 0
for fname, contents in self.tc.spams():
obs_num_spams += 1
# Validate results
self.assertEqual(self.true_class[fname], SPAM_TAG,
'Non-spam email returned by spams() method.')
self.assertEqual(self.email_dict[fname], contents,
'The read file contents are not equal to the expected contents.')
c = Counter(self.true_class.values())
exp_num_spams = c[SPAM_TAG]
self.assertEqual(exp_num_spams, obs_num_spams,
'The spams() method did not return the right number of spams.')
def test_hams(self):
"""Test hams() method."""
obs_num_hams = 0
for fname, contents in self.tc.hams():
obs_num_hams += 1
# Validate results
self.assertEqual(self.true_class[fname], HAM_TAG,
'Spam email returned by hams() method.')
self.assertEqual(self.email_dict[fname], contents,
'The read file contents are not equal to the expected contents.')
c = Counter(self.true_class.values())
exp_num_hams = c[HAM_TAG]
self.assertEqual(exp_num_hams, obs_num_hams,
'The hams() method did not return the right number of hams.')
def create_classification_for(keys):
"""Create a fake classification dictionary for the email filenames in keys."""
d = {}
TRUTH_VALUES = [HAM_TAG, SPAM_TAG]
for key in keys:
d[key] = random.choice(TRUTH_VALUES)
return d
if __name__=="__main__":
unittest.main()