def processFile(filepath, phishy=True, limit=500): mbox = mailbox.mbox(filepath) i = 1 data = [] email_index = [] finders = [ HTMLFormFinder(), AttachmentFinder(), FlashFinder(), IFrameFinder(), HTMLContentFinder(), URLsFinder(), ExternalResourcesFinder(), JavascriptFinder(), CssFinder(), IPsInURLs(), AtInURLs(), EncodingFinder() ] for message in mbox: dict = {} totalsize = 0 payload = utils.getpayload_dict(message) for part in payload: totalsize += len(re.sub(r'\s+', '', part["payload"])) if totalsize < 1: print "empty email - " + str(phishy) + " - " + utils.getpayload( message) continue for finder in finders: dict[finder.getFeatureTitle()] = finder.getFeature(message) dict["Phishy"] = phishy data.append(dict) email_fields = {} email_fields["id"] = i email_fields["message"] = utils.getpayload(message) email_fields["raw"] = str(message) email_index.append(email_fields) i += 1 if limit and i >= limit: break df = pd.DataFrame(data) df.to_csv(filepath + "-export", quoting=csv.QUOTE_ALL) emails = pd.DataFrame(email_index) emails.to_csv(filepath + "-export-index.csv")
def getFeature(self, message): import re super(FlashFinder, self).getFeature(message) payload = utils.getpayload(message).lower() swflinks = re.compile(FLASH_LINKED_CONTENT, re.IGNORECASE).findall(payload) flashObject = re.compile(r'embed\s*src\s*=\s*\".*\.swf\"', re.IGNORECASE).search(payload) return (swflinks != None and len(swflinks) > 0) or \ (flashObject != None)
def getFeature(self, message): import re super(IFrameFinder, self).getFeature(message) payload = utils.getpayload(message).lower() return re.compile(r'<\s?\/?\s?iframe\s?>', re.IGNORECASE).search(payload) != None