示例#1
0
def get_mail_files():
    for i in range(1, 46):
        try:
            # mail = mailparser.parse_from_file('../../Mailboxes/PhishingCorpus_Jose_Nazario/public_phishing/phishing3/{}.eml'.format(i))
            mail = mailparser.parse_from_file('../../Mailboxes/IndividualTestMails/Phish/{}.eml'.format(i))
            # mail = mailparser.parse_from_file('../../Mailboxes/IndividualTestMails/Ham/{}.eml'.format(i))
            # mail = mailparser.parse_from_file('../../Mailboxes/Jonathan_Mailbox/{}.eml'.format(i))

            if 'ARC-Authentication-Results' in mail.headers or 'Authentication-Results' in mail.headers:
                try:
                    headers = mail.headers['ARC-Authentication-Results']
                except KeyError:
                    headers = mail.headers['Authentication-Results']
            else:
                headers = None

            test_mail_item = EmailData( \
            mail.subject, \
            mail.from_, \
            mail.attachments, \
            mail.body, \
            headers
            )

            test_mail_item.generate_features()
        except FileNotFoundError:
            pass
示例#2
0
def test_model_olden_phish(model, test_data_dir, start, end):
    count = 0
    phish = 0
    for i in range(start, end+1):
        try:
            mail = mailparser.parse_from_file(r'{}{}.eml'.format(test_data_dir, i))
            test_mail = EmailData(mail.subject, mail.from_, mail.attachments, mail.body, mail.headers)
            test_mail.generate_features()
            result = model.predict(test_mail.repr_in_arr())
            count+=1
            if result == 1:
                phish+=1
            print("Result: {}".format(result))
        except FileNotFoundError:
            pass
    print("Detected Mails: {} -- Total Mails: {}".format(phish, count))
    print("Accuracy: {}".format((phish/count)*100))
示例#3
0
def main():
    train_email_data = EmailData()
    train_email_data.load_from_file('data/train')

    feature = Feature()
    feature.learn(train_email_data)
    train_data_set = feature.translate_email_data(train_email_data)

    #print(feature.features)

    naive_bayesian = NaiveBayesian()
    naive_bayesian.learn(feature, train_data_set)

    test_email_data = EmailData()
    test_email_data.load_from_file('data/test')
    test_data_set = feature.translate_email_data(test_email_data)

    print('# Training set')
    test(naive_bayesian, train_data_set)
    print('# Testing set')
    test(naive_bayesian, test_data_set)
示例#4
0
from EmailData import EmailData
import re

import mailparser

for i in range(1, 57):
    try:
        # mail = mailparser.parse_from_file('../../Mailboxes/PhishingCorpus_Jose_Nazario/public_phishing/phishing3/{}.eml'.format(i))
        # mail = mailparser.parse_from_file('../../Mailboxes/PhishingCorpus_Jose_Nazario/public_phishing/phishing3/{}.eml'.format(i))
        mail = mailparser.parse_from_file(
            '../../Mailboxes/Phish/ModernPhish3/{}.eml'.format(i))
        # mail = mailparser.parse_from_file('../../Mailboxes/Hams/ModernHam1/{}.eml'.format(i))
        # mail = mailparser.parse_from_file('../../Mailboxes/enron_mail_20150507/maildir/allen-p/all_documents/{}..eml'.format(i))
        # mail = mailparser.parse_from_file('../../Mailboxes/enron_mail_20150507/maildir/arnold-j/all_documents/{}..eml'.format(i))


        test_mail_item = EmailData( \
        mail.subject, \
        mail.from_, \
        mail.attachments, \
        mail.body, \
        mail.headers
        )
        test_mail_item.generate_features()
        print("{}".format(test_mail_item))
    except FileNotFoundError:
        pass
示例#5
0
def test_model_single(model: RandomForestClassifier, file_path: str):
    mail = mailparser.parse_from_file(r'{}'.format(file_path))
    test_mail = EmailData(mail.subject, mail.from_, mail.attachments, mail.body, mail.headers)
    test_mail.generate_features()
    result = model.predict(test_mail.repr_in_arr())
    print("Result: {}".format(result))