def get_mail_files(): for i in range(1, 46): try: # mail = mailparser.parse_from_file('../../Mailboxes/PhishingCorpus_Jose_Nazario/public_phishing/phishing3/{}.eml'.format(i)) mail = mailparser.parse_from_file('../../Mailboxes/IndividualTestMails/Phish/{}.eml'.format(i)) # mail = mailparser.parse_from_file('../../Mailboxes/IndividualTestMails/Ham/{}.eml'.format(i)) # mail = mailparser.parse_from_file('../../Mailboxes/Jonathan_Mailbox/{}.eml'.format(i)) if 'ARC-Authentication-Results' in mail.headers or 'Authentication-Results' in mail.headers: try: headers = mail.headers['ARC-Authentication-Results'] except KeyError: headers = mail.headers['Authentication-Results'] else: headers = None test_mail_item = EmailData( \ mail.subject, \ mail.from_, \ mail.attachments, \ mail.body, \ headers ) test_mail_item.generate_features() except FileNotFoundError: pass
def test_model_olden_phish(model, test_data_dir, start, end): count = 0 phish = 0 for i in range(start, end+1): try: mail = mailparser.parse_from_file(r'{}{}.eml'.format(test_data_dir, i)) test_mail = EmailData(mail.subject, mail.from_, mail.attachments, mail.body, mail.headers) test_mail.generate_features() result = model.predict(test_mail.repr_in_arr()) count+=1 if result == 1: phish+=1 print("Result: {}".format(result)) except FileNotFoundError: pass print("Detected Mails: {} -- Total Mails: {}".format(phish, count)) print("Accuracy: {}".format((phish/count)*100))
def main(): train_email_data = EmailData() train_email_data.load_from_file('data/train') feature = Feature() feature.learn(train_email_data) train_data_set = feature.translate_email_data(train_email_data) #print(feature.features) naive_bayesian = NaiveBayesian() naive_bayesian.learn(feature, train_data_set) test_email_data = EmailData() test_email_data.load_from_file('data/test') test_data_set = feature.translate_email_data(test_email_data) print('# Training set') test(naive_bayesian, train_data_set) print('# Testing set') test(naive_bayesian, test_data_set)
from EmailData import EmailData import re import mailparser for i in range(1, 57): try: # mail = mailparser.parse_from_file('../../Mailboxes/PhishingCorpus_Jose_Nazario/public_phishing/phishing3/{}.eml'.format(i)) # mail = mailparser.parse_from_file('../../Mailboxes/PhishingCorpus_Jose_Nazario/public_phishing/phishing3/{}.eml'.format(i)) mail = mailparser.parse_from_file( '../../Mailboxes/Phish/ModernPhish3/{}.eml'.format(i)) # mail = mailparser.parse_from_file('../../Mailboxes/Hams/ModernHam1/{}.eml'.format(i)) # mail = mailparser.parse_from_file('../../Mailboxes/enron_mail_20150507/maildir/allen-p/all_documents/{}..eml'.format(i)) # mail = mailparser.parse_from_file('../../Mailboxes/enron_mail_20150507/maildir/arnold-j/all_documents/{}..eml'.format(i)) test_mail_item = EmailData( \ mail.subject, \ mail.from_, \ mail.attachments, \ mail.body, \ mail.headers ) test_mail_item.generate_features() print("{}".format(test_mail_item)) except FileNotFoundError: pass
def test_model_single(model: RandomForestClassifier, file_path: str): mail = mailparser.parse_from_file(r'{}'.format(file_path)) test_mail = EmailData(mail.subject, mail.from_, mail.attachments, mail.body, mail.headers) test_mail.generate_features() result = model.predict(test_mail.repr_in_arr()) print("Result: {}".format(result))