def export_attachment(self, path=None): """ :param path: string of pure windows path from cwd (Ex: '\\output\\email_attachments' :return: """ from tqdm import tqdm # TODO make it work in MacOS if path is None: path = PureWindowsPath(os.getcwd() + '\\output\\email_attachments') else: path = PureWindowsPath(os.getcwd() + path) if not os.path.exists(path): os.makedirs(path) for m, em_id in tqdm(zip(self._mbox, list(range(len(self._mbox)))), total=len(self._mbox)): em_parser = EmailParser.EmailParser(m, em_id, with_url_status=False) em_parser.parse_all_data() has_attach = em_parser.get('has_attachment') if has_attach: attachment_type = em_parser.get('attachment_type') attachments = em_parser.get('attachments') for content_type, raw_content, attach_id in zip( attachment_type, attachments, list(range(len(attachments)))): file_ext = EmailParser.get_file_ext(content_type) EmailParser.save_attachment(path, raw_content, em_id, attach_id, file_ext)
def parse_email(self, with_url_status=False): from tqdm import tqdm header = [ 'email_id', 'from', 'to', 'ip', 'datetime', 'day', 'month', 'year', 'weekofyear', 'has_attach', 'attach_type', 'num_urls', 'urls', 'domain', 'subject', 'body', 'num_words' ] data = dict.fromkeys(header) # cannot create with (header,[]) because it makes all key point to the same reference. for k, _ in data.items(): data[k] = [] for m, em_id in tqdm(zip(self._mbox, list(range(len(self._mbox)))), total=len(self._mbox)): em_parser = EmailParser.EmailParser(m, em_id, with_url_status) em_parser.parse_all_data() _from = em_parser.get('from') _to = em_parser.get('to') _ip = em_parser.get('ip') _date = em_parser.get('date') data['email_id'].append(em_id) data['from'].append(_from) data['to'].append(_to) data['datetime'].append(str(_date)) data['day'].append(_date.day) data['month'].append(_date.month) data['year'].append(_date.year) data['weekofyear'].append(_date.isocalendar()[1]) data['ip'].append(_ip) data['has_attach'].append(em_parser.get("has_attachment")) data['attach_type'].append(em_parser.get("attachment_type")) data['num_urls'].append(len(em_parser.get("urls"))) data['urls'].append(em_parser.get('urls')) data['domain'].append(em_parser.get('urls_domain')) data['subject'].append(em_parser.get('subject')) data['body'].append(em_parser.get('body')) data['num_words'].append(em_parser.get('num_words')) self.data = data
numMess=len(mail.list()[1]) print " Found "+str(numMess)+" messages" #get the messages mails = [] #empty array where messages will be stored for i in range(numMess): s="" #string containing a message append=False db="" #the maillist the mail belong to for j in mail.retr(i+1)[1]: s=s+j+"\n" #check if the message belong to the mailing list for m in maillists: if checkLineInMaillist(j, m['name']): db=m['name'] append=True if(append): mails.append(EmailParser.MapMessageMailList(db, s)) #close the connection mail.quit() #convert the mail as text in a nicer object emailsObjects = EmailParser.convertTextArrayToMailArray(mails) if(len(emailsObjects)==0): print " No relevant mails found" else: print "Store in database" CouchDBConnection.saveListOfMailCouchdb(emailsObjects, REmailS)
mail.pass_(password) numMess = len(mail.list()[1]) print " Found " + str(numMess) + " messages" #get the messages mails = [] #empty array where messages will be stored for i in range(numMess): s = "" #string containing a message append = False db = "" #the maillist the mail belong to for j in mail.retr(i + 1)[1]: s = s + j + "\n" #check if the message belong to the mailing list for m in maillists: if checkLineInMaillist(j, m['name']): db = m['name'] append = True if (append): mails.append(EmailParser.MapMessageMailList(db, s)) #close the connection mail.quit() #convert the mail as text in a nicer object emailsObjects = EmailParser.convertTextArrayToMailArray(mails) if (len(emailsObjects) == 0): print " No relevant mails found" else: print "Store in database" CouchDBConnection.saveListOfMailCouchdb(emailsObjects, REmailS)
if tech not in TECHNIQUES: print("Invalid technique. Allowed values: bayes (for naive bayes), dt (for decision tree)") sys.exit(3) if not os.path.isdir(directory): print("Directory " + directory + " does not exist") sys.exit(4) else: if not os.path.isdir(directory + "/spam/"): print("Directory " + directory + "/spam/" + " does not exist") sys.exit(5) if not os.path.isdir(directory + "/notspam/"): print("Directory " + directory + "/notspam/" + " does not exist") sys.exit(6) p = EmailParser.Parser() spam_email_texts = p.parse(directory + "/spam/") non_spam_email_texts = p.parse(directory + "/notspam/") # sys.exit(0) if mode == "train": model = Model.Model(tech) model.train(spam_email_texts, non_spam_email_texts) model.save(model_path, tech) print(model) elif mode == "test": model = Model.Model() model.load(model_path, tech) print(model) true_positive = 0
def __init__(self, config, alert_path, whitelister=None): # Run the super init to inherit attributes and load the config. super().__init__(config=config, whitelister=whitelister) alert_json_path = os.path.join(alert_path, "data.json") alert_json_path = os.path.normpath(alert_json_path) self.logger.info("Parsing ACE alert: " + alert_json_path) with open(alert_json_path) as a: self.json = json.load(a) self.iocs = [] self.source = "" self.path = alert_path self.time = self.json["event_time"] self.tool = self.json["tool"] self.type = self.json["type"] self.name = self.json["uuid"] self.description = self.json["description"] try: self.company_name = self.json["company_name"] except KeyError: self.company_name = "legacy" # Load the URL from the config file. self.alert_url = self.config["ACEAlert"]["alert_url"] + self.name ################## ## ## ## FIND EMAIL ## ## ## ################## # Get a listing of the alert directory to try and find an e-mail. alert_files = os.listdir(self.path) potential_emails = [] for file in alert_files: file_path = os.path.join(self.path, file) if os.path.isfile(file_path): mime = self.get_file_mimetype(os.path.join(self.path, file)) if "rfc822" in mime: try: email = EmailParser.EmailParser( self.config, smtp_path=file_path, whitelister=self.whitelister) email.reference = self.alert_url potential_emails.append(email) except Exception: # Log and skip this e-mail if it couldn't be parsed. self.logger.exception("Error parsing e-mail: " + file_path) # Since ACE makes .header files that also appear as rfc822 files, pick the right one. if len(potential_emails) == 1: self.email = potential_emails[0] elif len(potential_emails) > 1: # Probably should have a more robust method of picking e-mails. try: self.email = next(email for email in potential_emails if email.body or email.html) except: self.logger.exception("Error picking the e-mail") ##################### ## ## ## USER ANALYSIS ## ## ## ##################### # Try and find any user analysis files. user_analysis_files = self.get_all_analysis_paths( "saq.modules.user:EmailAddressAnalysis") # Parse any user_analysis_files. self.user_analysis = [] for file in user_analysis_files: if os.path.exists(os.path.join(self.path, ".ace", file)): with open(os.path.join(self.path, ".ace", file)) as j: json_data = json.load(j) # Verify that certain keys actually have values. if "cn" not in json_data: json_data["cn"] = "" if "displayName" not in json_data: json_data["displayName"] = "" if "mail" not in json_data: json_data["mail"] = "" if "title" not in json_data: json_data["title"] = "" if "description" not in json_data: json_data["description"] = [""] if "department" not in json_data: json_data["department"] = "" if "company" not in json_data: json_data["company"] = "" if "distinguishedName" not in json_data: json_data["distinguishedName"] = "" self.user_analysis.append(json_data) ############ ## ## ## URLS ## ## ## ############ # Save whatever URLs ACE was able to automatically extract. self.urls = [] url_files = self.get_all_analysis_paths( "saq.modules.file_analysis:URLExtractionAnalysis") for file in url_files: with open(os.path.join(self.path, ".ace", file)) as j: json_data = json.load(j) for url in json_data: if url.endswith("/"): url = url[:-1] if not any( other_url.startswith(url) and other_url != url for other_url in self.urls): self.urls.append(url) else: self.logger.debug( "Skipping duplicate/partial ACE extracted URL: " + url) # Make Indicators for any URLs that ACE extracted. indicator_list = Indicator.generate_url_indicators(self.urls) # Add some additional tags and add them to our main IOC list. for ind in indicator_list: ind.add_tags("ace_extracted_url") self.iocs.append(ind) ########################### ## ## ## FIND SANDBOX REPORT ## ## ## ########################### valid_sandbox_paths = self.config["ACEAlert"][ "valid_sandbox_paths"].split(",") # Walk the entire alert directory to find any possible sandbox reports. for root, dirs, files in os.walk(self.path): for file in files: # Make sure we are in a valid sandbox directory. if any(path in root for path in valid_sandbox_paths): # Make sure the file ends with .json. if file.endswith(".json"): # Filter out the "network_" and "processtree_" WildFire JSON. # This is currently a hack for how we dump the WildFire JSON. if not root.endswith( "dropped" ) and "network_" not in file and "processtree_" not in file: # At this point, assume this is a sandbox report. Try to add it. sandbox_json_path = os.path.join(root, file) self.add_sandbox(sandbox_json_path)
initialiseCSV() messageIDMappings = {} for root, dirs, files in os.walk(__CATEGORIESLOCATION__): if len(files) > 0: filenames = os.listdir(root) # Remove .cats files from listdir for names in filenames: if names.split(".")[-1] == "cats": filenames.remove(names) for names in filenames: i = i + 1 if i % 10 == 0: # No of files processed print i filePath = root + "/" + names catsFile = filePath.split(".")[0] + ".cats" categoryInfo = parseCats(catsFile) importanceRating = calculateSubjectiveImportance(categoryInfo) data = EmailParser.parseEmail(filePath) processedData = EmailParser.processData(data) messageID = processedData[10] messageIDMappings[messageID] = importanceRating # writeToCSV(i,filePath,processedData,categoryInfo,importanceRating) generateCategoriesJSON(messageIDMappings)
initialiseCSV() messageIDMappings = {} for root, dirs, files in os.walk(__CATEGORIESLOCATION__): if len(files) > 0: filenames = os.listdir(root) #Remove .cats files from listdir for names in filenames: if (names.split('.')[-1] == 'cats'): filenames.remove(names) for names in filenames: i = i + 1 if i % 10 == 0: #No of files processed print i filePath = root + "/" + names catsFile = filePath.split('.')[0] + '.cats' categoryInfo = parseCats(catsFile) importanceRating = calculateSubjectiveImportance(categoryInfo) data = EmailParser.parseEmail(filePath) processedData = EmailParser.processData(data) messageID = processedData[10] messageIDMappings[messageID] = importanceRating #writeToCSV(i,filePath,processedData,categoryInfo,importanceRating) generateCategoriesJSON(messageIDMappings)
import tkinter as tk from tkinter import filedialog import os import datetime as date # These variables set up an invisible window that hosts the file dialog. root = tk.Tk() root.withdraw() # Example of file dialog. Use when necessary for getting files. #file_path = filedialog.askopenfilename() spacing = "\n" # This object handles finding the COMPANY_NAME email in user's outlook application. up_email = EmailParser.EmailParser() excel_request_name = "_REO_Testplan-First.xlsx" # This function helps check if a CP exists before adding it to a list. def check_if_cp_exists(cfa, cp_name): planner = ExcelPlanner.ExcelPlanner(cfa) cp_exists = planner.check_if_cp_exists(cp_name) return cp_exists # This function gets all cp names in the selected CFA/Excel doc. def get_cp_names(cfa):