def setUp(self): self.plain_file = './tests/fixtures/plain.eml' self.plaintext = io.open(self.plain_file, 'rb') self.text = self.plaintext.read() self.plaintext.seek(0) self.plain_email = EmailObject(self.plaintext) self.maxDiff = None
def train(self): for category, file in self.to_train: email = EmailObject(io.open(file, 'rb')) self.categories.add(category) for token in Tokenizer.unique_tokenizer(email.body()): self.training[category][token] += 1 self.totals['_all'] += 1 self.totals[category] += 1 self.to_train = {}
class TestHTMLEmail(unittest.TestCase): def setUp(self): self.html_file = io.open('./tests/fixtures/html.eml', 'rb') self.html = self.html_file.read() self.html_file.seek(0) self.html_email = EmailObject(self.html_file) def test_parses_stores_inner_text_html(self): body = "\n\n".join(self.html.split("\n\n")[1:]) expected = BeautifulSoup(body).text self.assertEqual(self.html_email.body(), expected) def test_stores_subject(self): subject = re.search("Subject: (.*)", self.html).group(1) self.assertEqual(self.html_email.subject(), subject)
class TestHTMLEmail(unittest.TestCase): def setUp(self): self.html_file = io.open("./tests/fixtures/html.eml", "rb") self.html = self.html_file.read() self.html_file.seek(0) self.html_email = EmailObject(self.html_file) def test_parses_stores_inner_text_html(self): body = "\n\n".join(self.html.split("\n\n")[1:]) expected = BeautifulSoup(body).text self.assertEqual(self.html_email.body(), expected) def test_stores_subject(self): subject = re.search("Subject: (.*)", self.html).group(1) self.assertEqual(self.html_email.subject(), subject)
class TestHTMLEmail(unittest.TestCase): def setUp(self): self.html_file = io.open('./tests/fixtures/html.eml', 'rb') self.html = self.html_file.read() self.html_file.seek(0) self.html_email = EmailObject(self.html_file) self.maxDiff = None def test_parses_stores_inner_text_html(self): body = b'\n\n'.join(self.html.split(b'\n\n')[1:]) expected = BeautifulSoup(body, features = 'html.parser').text self.assertEqual(self.html_email.body(), expected) def test_stores_subject(self): subject = re.search('Subject: (.*)', str(self.html)).group(1) self.assertEqual(str(self.html_email.subject()), subject)
def setUp(self): self.training = [['spam', './tests/fixtures/plain.eml'], ['ham', './tests/fixtures/small.eml'], ['scram', './tests/fixtures/plain.eml']] self.trainer = SpamTrainer(self.training) with io.open('./tests/fixtures/plain.eml', 'rb') as eml_file: self.email = EmailObject(eml_file)
def setUp(self): self.training = [['spam', './tests/fixtures/plain.eml'], ['ham', './tests/fixtures/small.eml'], ['scram', './tests/fixtures/plain.eml']] self.trainer = SpamTrainer(self.training) file = io.open('./tests/fixtures/plain.eml', 'r') self.email = EmailObject(file)
class TestPlaintextEmailObject(unittest.TestCase): CLRF = b'\\ n' def setUp(self): self.plain_file = './tests/fixtures/plain.eml' self.plaintext = io.open(self.plain_file, 'rb') self.text = self.plaintext.read() self.plaintext.seek(0) self.plain_email = EmailObject(self.plaintext) self.maxDiff = None def test_parse_plain_body(self): body = self.CLRF.join(self.text.split(self.CLRF)[:1]) self.assertEqual(self.plain_email.body(), body) def test_parses_the_subject(self): subject = re.search('Subject: (.*)', str(self.text)).group(1) self.assertEqual(str(self.plain_email.subject()), subject)
class TestPlaintextEmailObject(unittest.TestCase): CLRF = "\n\n" def setUp(self): self.plain_file = './tests/fixtures/plain.eml' self.plaintext = io.open(self.plain_file, 'r') self.text = self.plaintext.read() self.plaintext.seek(0) self.plain_email = EmailObject(self.plaintext) def test_parse_plain_body(self): body = self.CLRF.join(self.text.split(self.CLRF)[1:]) self.assertEqual(self.plain_email.body(), body) def test_parses_the_subject(self): subject = re.search("Subject: (.*)", self.text).group(1) self.assertEqual(self.plain_email.subject(), subject)
class TestHTMLEmail(unittest.TestCase): def setUp(self): with io.open('./tests/fixtures/html.eml', 'rb') as html_file: self.html = html_file.read().decode('utf-8') html_file.seek(0) self.html_email = EmailObject(html_file) def test_parses_stores_inner_text_html(self): body = "\n\n".join(self.html.split("\n\n")[1:]) expected = BeautifulSoup(body, 'html.parser').text actual_body = self.html_email.body() self.assertEqual(actual_body, expected) def test_stores_subject(self): expected_subject = re.search("Subject: (.*)", self.html).group(1) actual_subject = self.html_email.subject() self.assertEqual(actual_subject, expected_subject)
def parse_emails(keyfile): emails = [] print('parsing emails for ' + keyfile) for line in io.open(keyfile, 'rb'): label, file = line.rstrip().split(b' ') with io.open(file, 'rb') as f: emails.append(EmailObject(f, category = label)) print('Done parsing files for ' + keyfile) return emails
class TestMultipartEmailObject(unittest.TestCase): def setUp(self): self.multipart_file = './tests/fixtures/multipart.eml' self.multipart = io.open(self.multipart_file, 'r') self.text = self.multipart.read() self.multipart.seek(0) self.multipart_email = EmailObject(self.multipart) def test_parse_concatenated_body_of_text(self): internal_mail = self.multipart_email.mail assert internal_mail.is_multipart() body = '' for part in internal_mail.walk(): if re.match("text/plain", part.get_content_type()): body += part.get_payload(decode=True) elif re.match("text/html", part.get_content_type()): body += part.get_payload(decode=True) self.assertEqual(self.multipart_email.body(), body) def test_stores_subject(self): subject = re.search("Subject: (.*)", self.text).group(1) self.assertEqual(self.multipart_email.subject(), subject)
def setUp(self): self.html_file = io.open('./tests/fixtures/html.eml', 'rb') self.html = self.html_file.read() self.html_file.seek(0) self.html_email = EmailObject(self.html_file)
def setUp(self): self.plain_file = './tests/fixtures/plain.eml' with io.open(self.plain_file, 'rb') as plaintext: self.text = plaintext.read().decode('utf-8') plaintext.seek(0) self.plain_email = EmailObject(plaintext)
def setUp(self): self.multipart_file = './tests/fixtures/multipart.eml' self.multipart = io.open(self.multipart_file, 'r') self.text = self.multipart.read() self.multipart.seek(0) self.multipart_email = EmailObject(self.multipart)
def setUp(self): self.plain_file = './tests/fixtures/plain.eml' self.plaintext = io.open(self.plain_file, 'r') self.text = self.plaintext.read() self.plaintext.seek(0) self.plain_email = EmailObject(self.plaintext)
def setUp(self): with io.open('./tests/fixtures/html.eml', 'rb') as html_file: self.html = html_file.read().decode('utf-8') html_file.seek(0) self.html_email = EmailObject(html_file)
def setUp(self): self.html_file = io.open("./tests/fixtures/html.eml", "rb") self.html = self.html_file.read() self.html_file.seek(0) self.html_email = EmailObject(self.html_file)
def setUp(self): self.multipart_file = './tests/fixtures/multipart.eml' with io.open(self.multipart_file, 'rb') as multipart: self.text = multipart.read().decode('utf-8') multipart.seek(0) self.multipart_email = EmailObject(multipart)