Python EmailParser примеры, EmailParser Python примеры использования

Пример #1

0

Показать файл

    def export_attachment(self, path=None):
        """
        :param path: string of pure windows path from cwd (Ex: '\\output\\email_attachments'
        :return:
        """
        from tqdm import tqdm

        # TODO  make it work in MacOS
        if path is None:
            path = PureWindowsPath(os.getcwd() + '\\output\\email_attachments')
        else:
            path = PureWindowsPath(os.getcwd() + path)

        if not os.path.exists(path):
            os.makedirs(path)

        for m, em_id in tqdm(zip(self._mbox, list(range(len(self._mbox)))),
                             total=len(self._mbox)):
            em_parser = EmailParser.EmailParser(m,
                                                em_id,
                                                with_url_status=False)
            em_parser.parse_all_data()
            has_attach = em_parser.get('has_attachment')
            if has_attach:
                attachment_type = em_parser.get('attachment_type')
                attachments = em_parser.get('attachments')

                for content_type, raw_content, attach_id in zip(
                        attachment_type, attachments,
                        list(range(len(attachments)))):
                    file_ext = EmailParser.get_file_ext(content_type)
                    EmailParser.save_attachment(path, raw_content, em_id,
                                                attach_id, file_ext)

Пример #2

0

Показать файл

    def parse_email(self, with_url_status=False):
        from tqdm import tqdm
        header = [
            'email_id', 'from', 'to', 'ip', 'datetime', 'day', 'month', 'year',
            'weekofyear', 'has_attach', 'attach_type', 'num_urls', 'urls',
            'domain', 'subject', 'body', 'num_words'
        ]

        data = dict.fromkeys(header)
        # cannot create with (header,[]) because it makes all key point to the same reference.
        for k, _ in data.items():
            data[k] = []

        for m, em_id in tqdm(zip(self._mbox, list(range(len(self._mbox)))),
                             total=len(self._mbox)):
            em_parser = EmailParser.EmailParser(m, em_id, with_url_status)
            em_parser.parse_all_data()
            _from = em_parser.get('from')
            _to = em_parser.get('to')
            _ip = em_parser.get('ip')
            _date = em_parser.get('date')

            data['email_id'].append(em_id)
            data['from'].append(_from)
            data['to'].append(_to)
            data['datetime'].append(str(_date))
            data['day'].append(_date.day)
            data['month'].append(_date.month)
            data['year'].append(_date.year)
            data['weekofyear'].append(_date.isocalendar()[1])
            data['ip'].append(_ip)
            data['has_attach'].append(em_parser.get("has_attachment"))
            data['attach_type'].append(em_parser.get("attachment_type"))
            data['num_urls'].append(len(em_parser.get("urls")))
            data['urls'].append(em_parser.get('urls'))
            data['domain'].append(em_parser.get('urls_domain'))
            data['subject'].append(em_parser.get('subject'))
            data['body'].append(em_parser.get('body'))
            data['num_words'].append(em_parser.get('num_words'))

        self.data = data

Пример #3

0

Показать файл

Файл: POPclient.py Проект: AmitBapodara/r-email

    numMess=len(mail.list()[1])
    print " Found "+str(numMess)+" messages"

    #get the messages
    mails = [] #empty array where messages will be stored
    for i in range(numMess):
        s="" #string containing a message
        append=False
        db="" #the maillist the mail belong to
        for j in mail.retr(i+1)[1]:
            s=s+j+"\n"
            #check if the message belong to the mailing list
            for m in maillists:
                if checkLineInMaillist(j, m['name']):
                    db=m['name']
                    append=True
        if(append):
            mails.append(EmailParser.MapMessageMailList(db, s))

    #close the connection
    mail.quit()

    #convert the mail as text in a nicer object
    emailsObjects = EmailParser.convertTextArrayToMailArray(mails)
    if(len(emailsObjects)==0):
        print " No relevant mails found"
    else:
        print "Store in database"
        CouchDBConnection.saveListOfMailCouchdb(emailsObjects, REmailS)

Пример #4

0

Показать файл

    mail.pass_(password)
    numMess = len(mail.list()[1])
    print " Found " + str(numMess) + " messages"

    #get the messages
    mails = []  #empty array where messages will be stored
    for i in range(numMess):
        s = ""  #string containing a message
        append = False
        db = ""  #the maillist the mail belong to
        for j in mail.retr(i + 1)[1]:
            s = s + j + "\n"
            #check if the message belong to the mailing list
            for m in maillists:
                if checkLineInMaillist(j, m['name']):
                    db = m['name']
                    append = True
        if (append):
            mails.append(EmailParser.MapMessageMailList(db, s))

    #close the connection
    mail.quit()

    #convert the mail as text in a nicer object
    emailsObjects = EmailParser.convertTextArrayToMailArray(mails)
    if (len(emailsObjects) == 0):
        print " No relevant mails found"
    else:
        print "Store in database"
        CouchDBConnection.saveListOfMailCouchdb(emailsObjects, REmailS)

Пример #5

0

Показать файл

Файл: spam.py Проект: raghavendra-nataraj/Spam-Filter

if tech not in TECHNIQUES:
    print("Invalid technique. Allowed values: bayes (for naive bayes), dt (for decision tree)")
    sys.exit(3)

if not os.path.isdir(directory):
    print("Directory " + directory + " does not exist")
    sys.exit(4)
else:
    if not os.path.isdir(directory + "/spam/"):
        print("Directory " + directory + "/spam/" + " does not exist")
        sys.exit(5)
    if not os.path.isdir(directory + "/notspam/"):
        print("Directory " + directory + "/notspam/" + " does not exist")
        sys.exit(6)

p = EmailParser.Parser()
spam_email_texts = p.parse(directory + "/spam/")
non_spam_email_texts = p.parse(directory + "/notspam/")

# sys.exit(0)
if mode == "train":
    model = Model.Model(tech)
    model.train(spam_email_texts, non_spam_email_texts)
    model.save(model_path, tech)
    print(model)

elif mode == "test":
    model = Model.Model()
    model.load(model_path, tech)
    print(model)
    true_positive = 0

Пример #6

0

Показать файл

Файл: ACEAlert.py Проект: PoeBlu/integralutils

    def __init__(self, config, alert_path, whitelister=None):
        # Run the super init to inherit attributes and load the config.
        super().__init__(config=config, whitelister=whitelister)

        alert_json_path = os.path.join(alert_path, "data.json")
        alert_json_path = os.path.normpath(alert_json_path)

        self.logger.info("Parsing ACE alert: " + alert_json_path)
        with open(alert_json_path) as a:
            self.json = json.load(a)

        self.iocs = []
        self.source = ""
        self.path = alert_path
        self.time = self.json["event_time"]
        self.tool = self.json["tool"]
        self.type = self.json["type"]
        self.name = self.json["uuid"]
        self.description = self.json["description"]
        try:
            self.company_name = self.json["company_name"]
        except KeyError:
            self.company_name = "legacy"

        # Load the URL from the config file.
        self.alert_url = self.config["ACEAlert"]["alert_url"] + self.name

        ##################
        ##              ##
        ##  FIND EMAIL  ##
        ##              ##
        ##################
        # Get a listing of the alert directory to try and find an e-mail.
        alert_files = os.listdir(self.path)
        potential_emails = []
        for file in alert_files:
            file_path = os.path.join(self.path, file)
            if os.path.isfile(file_path):
                mime = self.get_file_mimetype(os.path.join(self.path, file))
                if "rfc822" in mime:
                    try:
                        email = EmailParser.EmailParser(
                            self.config,
                            smtp_path=file_path,
                            whitelister=self.whitelister)
                        email.reference = self.alert_url
                        potential_emails.append(email)
                    except Exception:
                        # Log and skip this e-mail if it couldn't be parsed.
                        self.logger.exception("Error parsing e-mail: " +
                                              file_path)

        # Since ACE makes .header files that also appear as rfc822 files, pick the right one.
        if len(potential_emails) == 1:
            self.email = potential_emails[0]
        elif len(potential_emails) > 1:
            # Probably should have a more robust method of picking e-mails.
            try:
                self.email = next(email for email in potential_emails
                                  if email.body or email.html)
            except:
                self.logger.exception("Error picking the e-mail")

        #####################
        ##                 ##
        ##  USER ANALYSIS  ##
        ##                 ##
        #####################
        # Try and find any user analysis files.
        user_analysis_files = self.get_all_analysis_paths(
            "saq.modules.user:EmailAddressAnalysis")

        # Parse any user_analysis_files.
        self.user_analysis = []
        for file in user_analysis_files:
            if os.path.exists(os.path.join(self.path, ".ace", file)):
                with open(os.path.join(self.path, ".ace", file)) as j:
                    json_data = json.load(j)

                    # Verify that certain keys actually have values.
                    if "cn" not in json_data: json_data["cn"] = ""
                    if "displayName" not in json_data:
                        json_data["displayName"] = ""
                    if "mail" not in json_data: json_data["mail"] = ""
                    if "title" not in json_data: json_data["title"] = ""
                    if "description" not in json_data:
                        json_data["description"] = [""]
                    if "department" not in json_data:
                        json_data["department"] = ""
                    if "company" not in json_data: json_data["company"] = ""
                    if "distinguishedName" not in json_data:
                        json_data["distinguishedName"] = ""
                    self.user_analysis.append(json_data)

        ############
        ##        ##
        ##  URLS  ##
        ##        ##
        ############
        # Save whatever URLs ACE was able to automatically extract.
        self.urls = []

        url_files = self.get_all_analysis_paths(
            "saq.modules.file_analysis:URLExtractionAnalysis")
        for file in url_files:
            with open(os.path.join(self.path, ".ace", file)) as j:
                json_data = json.load(j)
                for url in json_data:
                    if url.endswith("/"):
                        url = url[:-1]
                    if not any(
                            other_url.startswith(url) and other_url != url
                            for other_url in self.urls):
                        self.urls.append(url)
                    else:
                        self.logger.debug(
                            "Skipping duplicate/partial ACE extracted URL: " +
                            url)

        # Make Indicators for any URLs that ACE extracted.
        indicator_list = Indicator.generate_url_indicators(self.urls)

        # Add some additional tags and add them to our main IOC list.
        for ind in indicator_list:
            ind.add_tags("ace_extracted_url")
            self.iocs.append(ind)

        ###########################
        ##                       ##
        ##  FIND SANDBOX REPORT  ##
        ##                       ##
        ###########################
        valid_sandbox_paths = self.config["ACEAlert"][
            "valid_sandbox_paths"].split(",")

        # Walk the entire alert directory to find any possible sandbox reports.
        for root, dirs, files in os.walk(self.path):
            for file in files:
                # Make sure we are in a valid sandbox directory.
                if any(path in root for path in valid_sandbox_paths):
                    # Make sure the file ends with .json.
                    if file.endswith(".json"):
                        # Filter out the "network_" and "processtree_" WildFire JSON.
                        # This is currently a hack for how we dump the WildFire JSON.
                        if not root.endswith(
                                "dropped"
                        ) and "network_" not in file and "processtree_" not in file:
                            # At this point, assume this is a sandbox report. Try to add it.
                            sandbox_json_path = os.path.join(root, file)
                            self.add_sandbox(sandbox_json_path)

Пример #7

0

Показать файл

Файл: CategoriesEmailParser.py Проект: jamesdowdall/Enron-Suite

    initialiseCSV()
    messageIDMappings = {}
    for root, dirs, files in os.walk(__CATEGORIESLOCATION__):
        if len(files) > 0:
            filenames = os.listdir(root)

            # Remove .cats files from listdir
            for names in filenames:
                if names.split(".")[-1] == "cats":
                    filenames.remove(names)

            for names in filenames:
                i = i + 1
                if i % 10 == 0:  # No of files processed
                    print i

                filePath = root + "/" + names
                catsFile = filePath.split(".")[0] + ".cats"

                categoryInfo = parseCats(catsFile)
                importanceRating = calculateSubjectiveImportance(categoryInfo)

                data = EmailParser.parseEmail(filePath)
                processedData = EmailParser.processData(data)

                messageID = processedData[10]
                messageIDMappings[messageID] = importanceRating
                # writeToCSV(i,filePath,processedData,categoryInfo,importanceRating)

    generateCategoriesJSON(messageIDMappings)

Пример #8

0

Показать файл

    initialiseCSV()
    messageIDMappings = {}
    for root, dirs, files in os.walk(__CATEGORIESLOCATION__):
        if len(files) > 0:
            filenames = os.listdir(root)

            #Remove .cats files from listdir
            for names in filenames:
                if (names.split('.')[-1] == 'cats'):
                    filenames.remove(names)

            for names in filenames:
                i = i + 1
                if i % 10 == 0:  #No of files processed
                    print i

                filePath = root + "/" + names
                catsFile = filePath.split('.')[0] + '.cats'

                categoryInfo = parseCats(catsFile)
                importanceRating = calculateSubjectiveImportance(categoryInfo)

                data = EmailParser.parseEmail(filePath)
                processedData = EmailParser.processData(data)

                messageID = processedData[10]
                messageIDMappings[messageID] = importanceRating
                #writeToCSV(i,filePath,processedData,categoryInfo,importanceRating)

    generateCategoriesJSON(messageIDMappings)

Пример #9

0

Показать файл

import tkinter as tk
from tkinter import filedialog
import os
import datetime as date

# These variables set up an invisible window that hosts the file dialog.
root = tk.Tk()
root.withdraw()

# Example of file dialog. Use when necessary for getting files.
#file_path = filedialog.askopenfilename()

spacing = "\n"

# This object handles finding the COMPANY_NAME email in user's outlook application.
up_email = EmailParser.EmailParser()

excel_request_name = "_REO_Testplan-First.xlsx"


# This function helps check if a CP exists before adding it to a list.
def check_if_cp_exists(cfa, cp_name):
    planner = ExcelPlanner.ExcelPlanner(cfa)

    cp_exists = planner.check_if_cp_exists(cp_name)

    return cp_exists


# This function gets all cp names in the selected CFA/Excel doc.
def get_cp_names(cfa):

Python EmailParser, Softuni-Python примеры использования