def setUp(self): self.training = [['spam', './tests/fixtures/plain.eml'], ['ham', './tests/fixtures/small.eml'], ['scram', './tests/fixtures/plain.eml']] self.trainer = SpamTrainer(self.training) with io.open('./tests/fixtures/plain.eml', 'rb') as eml_file: self.email = EmailObject(eml_file)
def setUp(self): self.plain_file = './tests/fixtures/plain.eml' self.plaintext = io.open(self.plain_file, 'rb') self.text = self.plaintext.read() self.plaintext.seek(0) self.plain_email = EmailObject(self.plaintext) self.maxDiff = None
def setUp(self): self.training = [['spam', './tests/fixtures/plain.eml'], ['ham', './tests/fixtures/small.eml'], ['scram', './tests/fixtures/plain.eml']] self.trainer = SpamTrainer(self.training) file = io.open('./tests/fixtures/plain.eml', 'r') self.email = EmailObject(file)
def parse_emails(keyfile): emails = [] print('parsing emails for ' + keyfile) for line in io.open(keyfile, 'rb'): label, file = line.rstrip().split(b' ') with io.open(file, 'rb') as f: emails.append(EmailObject(f, category = label)) print('Done parsing files for ' + keyfile) return emails
def train(self): for category, file in self.to_train: email = EmailObject(io.open(file, 'rb')) self.categories.add(category) for token in Tokenizer.unique_tokenizer(email.body()): self.training[category][token] += 1 self.totals['_all'] += 1 self.totals[category] += 1 self.to_train = {}
def setUp(self): self.html_file = io.open('./tests/fixtures/html.eml', 'rb') self.html = self.html_file.read() self.html_file.seek(0) self.html_email = EmailObject(self.html_file)
def setUp(self): self.multipart_file = './tests/fixtures/multipart.eml' with io.open(self.multipart_file, 'rb') as multipart: self.text = multipart.read().decode('utf-8') multipart.seek(0) self.multipart_email = EmailObject(multipart)
def setUp(self): with io.open('./tests/fixtures/html.eml', 'rb') as html_file: self.html = html_file.read().decode('utf-8') html_file.seek(0) self.html_email = EmailObject(html_file)
def setUp(self): self.multipart_file = './tests/fixtures/multipart.eml' self.multipart = io.open(self.multipart_file, 'r') self.text = self.multipart.read() self.multipart.seek(0) self.multipart_email = EmailObject(self.multipart)
def setUp(self): self.plain_file = './tests/fixtures/plain.eml' with io.open(self.plain_file, 'rb') as plaintext: self.text = plaintext.read().decode('utf-8') plaintext.seek(0) self.plain_email = EmailObject(plaintext)