def MacroHunter(targetFile): answerTable = PrettyTable() answerTable.field_names = [f"{green}Threat Levels{white}", f"{green}Macros{white}", f"{green}Descriptions{white}"] print(f"\n{infoS} Looking for VBA Macros...") try: fileData = open(targetFile, "rb").read() vbaparser = VBA_Parser(targetFile, fileData) macroList = list(vbaparser.analyze_macros()) if vbaparser.contains_macros == True: for fi in range(0, len(macroList)): if macroList[fi][0] == 'Suspicious': if "(use option --deobf to deobfuscate)" in macroList[fi][2]: sanitized = f"{macroList[fi][2]}".replace("(use option --deobf to deobfuscate)", "") answerTable.add_row([f"{yellow}{macroList[fi][0]}{white}", f"{macroList[fi][1]}", f"{sanitized}"]) elif "(option --decode to see all)" in macroList[fi][2]: sanitized = f"{macroList[fi][2]}".replace("(option --decode to see all)", "") answerTable.add_row([f"{yellow}{macroList[fi][0]}{white}", f"{macroList[fi][1]}", f"{sanitized}"]) else: answerTable.add_row([f"{yellow}{macroList[fi][0]}{white}", f"{macroList[fi][1]}", f"{macroList[fi][2]}"]) elif macroList[fi][0] == 'IOC': answerTable.add_row([f"{magenta}{macroList[fi][0]}{white}", f"{macroList[fi][1]}", f"{macroList[fi][2]}"]) elif macroList[fi][0] == 'AutoExec': answerTable.add_row([f"{red}{macroList[fi][0]}{white}", f"{macroList[fi][1]}", f"{macroList[fi][2]}"]) else: answerTable.add_row([f"{macroList[fi][0]}", f"{macroList[fi][1]}", f"{macroList[fi][2]}"]) print(f"{answerTable}\n") else: print(f"{errorS} Not any VBA macros found.") except: print(f"{errorS} An error occured while parsing that file for macro scan.")
def doc_parsing(self, filename, filecontent): ''' Function to parse the given data in mail content ''' mil_attach = '' # reset var # send data to vba parser vbaparser = VBA_Parser(filename, data=filecontent) # if a macro is detected if vbaparser.detect_vba_macros(): results = vbaparser.analyze_macros() nr = 1 self.log("VBA Macros found") # generate report for log file for kw_type, keyword, description in results: if kw_type == 'Suspicious': mil_attach += 'Macro Number %i:\n Type: %s\n Keyword: %s\n Description: %s\n' % (nr, kw_type, keyword, description) nr += 1 mil_attach += '\nSummery:\nAutoExec keywords: %d\n' % vbaparser.nb_autoexec mil_attach += 'Suspicious keywords: %d\n' % vbaparser.nb_suspicious mil_attach += 'IOCs: %d\n' % vbaparser.nb_iocs mil_attach += 'Hex obfuscated strings: %d\n' % vbaparser.nb_hexstrings mil_attach += 'Base64 obfuscated strings: %d\n' % vbaparser.nb_base64strings mil_attach += 'Dridex obfuscated strings: %d\n' % vbaparser.nb_dridexstrings mil_attach += 'VBA obfuscated strings: %d' % vbaparser.nb_vbastrings r_level = vbaparser.nb_autoexec + vbaparser.nb_suspicious + vbaparser.nb_iocs + vbaparser.nb_hexstrings + vbaparser.nb_base64strings + vbaparser.nb_dridexstrings + vbaparser.nb_vbastrings # set reject level to global self.level = r_level vbaparser.close() return mil_attach # return the log to caller else: self.log("VBA no Macros found in file") vbaparser.close() return None # nothing found
def parseOLEDocument(f): """Parse an OLE document for VBA macros""" if not f or not useOLETools: return writeLog('DEBUG: Analyzing with oletools') try: v = VBA_Parser(f) except: writeLog("Not a supported file format: %s" % f) return writeLog('DEBUG: Detected file type: %s' % v.type) if v.detect_vba_macros(): writeLog('DEBUG: VBA Macros found') try: t = open("%s.analysis" % f, 'w') except IOError as e: writeLog("Cannot create analysis file %s.analysis: %s" % (f,e.strerror)) return for kw_type, keyword, description in v.analyze_macros(): t.write("%-12s | %-25s | %s\n" % (kw_type, keyword, description)) t.close() writeLog("DEBUG: Analysis dumped to %s.analysis" % f) else: writeLog('DEBUG: No VBA Macros found') return
def parseOLEDocument(f): """Parse an OLE document for VBA macros""" if not f or not useOLETools: return writeLog('DEBUG: Analyzing with oletools') try: v = VBA_Parser(f) except: writeLog("Not a supported file format: %s" % f) return writeLog('DEBUG: Detected file type: %s' % v.type) # Hack: Search for a .js extension fname, fextension = os.path.splitext(f) if v.detect_vba_macros() or fextension == ".js": writeLog('DEBUG: VBA Macros/JScript found') try: t = open("%s.analysis" % f, 'w') except IOError as e: writeLog("Cannot create analysis file %s.analysis: %s" % (f,e.strerror)) return for kw_type, keyword, description in v.analyze_macros(): t.write("%-12s | %-25s | %s\n" % (kw_type, keyword, description)) t.close() writeLog("DEBUG: Analysis dumped to %s.analysis" % f) else: writeLog('DEBUG: No VBA Macros found') return
def olevba_trig(file): try: vbaparser = VBA_Parser(file) if vbaparser.detect_vba_macros(): for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros(): print('Filename :', filename) print('OLE stream :', stream_path) print('VBA filename:', vba_filename) print('- ' * 39) print(vba_code) print('- ' * 39) results = vbaparser.analyze_macros() for kw_type, keyword, description in results: print('type=%s - keyword=%s - description=%s' % (kw_type, keyword, description)) print('AutoExec keywords: %d' % vbaparser.nb_autoexec) print('Suspicious keywords: %d' % vbaparser.nb_suspicious) print('IOCs: %d' % vbaparser.nb_iocs) print('Hex obfuscated strings: %d' % vbaparser.nb_hexstrings) print('Base64 obfuscated strings: %d' % vbaparser.nb_base64strings) print('Dridex obfuscated strings: %d' % vbaparser.nb_dridexstrings) print('VBA obfuscated strings: %d' % vbaparser.nb_vbastrings) print("\n") except: e = sys.exc_info()[0] print(f'{e} from {file}')
def get_macros(): extracted_macros = [] macro_analysis = [] tags = [] try: vbaparser = VBA_Parser('/sample') vbaparser.detect_vba_macros() for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros(): extracted_macros.append({ "stream_path": stream_path, "vba_filename": vba_filename, "vba_code": vba_code }) try: for kw_type, keyword, description in vbaparser.analyze_macros(): macro_analysis.append({ "kw_type": kw_type, "keyword": keyword, "description": description }) if keyword == 'Shell': tags.append('run-file') except TypeError: pass macro_suspicious_categories = { "nb_macros": vbaparser.nb_macros, "nb_autoexec": vbaparser.nb_autoexec, "nb_suspicious": vbaparser.nb_suspicious, "nb_iocs": vbaparser.nb_iocs, "nb_hexstrings": vbaparser.nb_hexstrings, "nb_base64strings": vbaparser.nb_base64strings, "nb_dridexstrings": vbaparser.nb_dridexstrings, "nb_vbastrings": vbaparser.nb_vbastrings, } if vbaparser.nb_macros: tags.append('macros') if vbaparser.nb_hexstrings or vbaparser.nb_base64strings: tags.append('obfuscated') except FileOpenError: return None, tags return { "extracted_macros": extracted_macros, "macro_analysis": macro_analysis, "macro_suspicious_categories": macro_suspicious_categories }, tags
def vbaparsing(filename): vbafile = VBA_Parser(filename) results = "" for (filename, stream_path, vba_filename, vba_code) in vbafile.extract_macros(): results = results + vba_code result = vbafile.analyze_macros() for kw_type, keyword, description in result: results = results + 'type=%s - keyword=%s - description=%s' % ( kw_type, keyword, description) return results
def get_macros_infos(self): """Check file macroses for suspisious behaviour.""" if not self.has_macros: return None vbaparser = VBA_Parser(self.file_path) vbaparser.analyze_macros() # obfuscated vba and autoexec danger autoexec_and_vba = 0 if vbaparser.nb_autoexec > 0 and vbaparser.nb_vbastrings > 0: autoexec_and_vba = vbaparser.nb_vbastrings # obfuscated Base64 and autoexec danger autoexec_and_base64 = 0 if vbaparser.nb_autoexec > 0 and vbaparser.nb_base64strings > 0: autoexec_and_base64 = vbaparser.nb_base64strings # obfuscated HEX and autoexec danger autoexec_and_HEX = 0 if vbaparser.nb_autoexec > 0 and vbaparser.nb_hexstrings > 0: autoexec_and_HEX = vbaparser.nb_hexstrings macros_infos = [ {'number': vbaparser.nb_autoexec, 'description': 'Ключевые слова автоматического вызова', 'danger': False, 'function': olevba.detect_autoexec}, {'number': autoexec_and_HEX, 'description': 'Автоматический вызов шеснадцатиричных обфусцированных строк', 'danger': False, 'function': olevba.detect_hex_strings}, {'number': vbaparser.nb_vbastrings, 'description': 'VBA обфусцированные строки', 'danger': False, 'function': olevba.detect_vba_strings}, {'number': vbaparser.nb_suspicious, 'description': 'Подозрительные ключевые слова', 'danger': False, 'function': olevba.detect_suspicious}, {'number': autoexec_and_vba, 'description': 'Автоматический вызов обфусцированного кода', 'danger': True, 'function': olevba.detect_vba_strings}, {'number': vbaparser.nb_dridexstrings, 'description': 'Dridex обфусцированные строки', 'danger': True, 'function': olevba.detect_dridex_strings}, {'number': autoexec_and_base64, 'description': 'Автоматический вызов Base64 обфусцированных строк', 'danger': True, 'function': olevba.detect_base64_strings}, ] vbaparser.close() return macros_infos
def check_macros(doc_path, filename) -> dict: try: vbaparser = VBA_Parser(doc_path) if not vbaparser.detect_vba_macros(): return "" logging.warning("VBA macros in \"%s\"" % filename) details = "Macro Results\n" suspicious_count = 0 suspicious_list = [] ioc_list = [] ioc_count = 0 mal_score = 0 logging.disable(level=logging.CRITICAL) results = vbaparser.analyze_macros() logging.disable(level=logging.NOTSET) for kw_type, keyword, description in results: details += "\t - " + kw_type + " - (" + keyword + ") - " + description + "\n" if kw_type.lower() == "suspicious": suspicious_count += 1 suspicious_list.append(keyword) elif kw_type.lower() == "ioc": ioc_count += 1 ioc_list.append(keyword) if suspicious_count != 0: logging.warning("Found %d suspicious items in \"%s\"" % (suspicious_count, filename)) mal_score += suspicious_count if ioc_count != 0: logging.warning("Found %d IOCs: %s, in \"%s\"" % (ioc_count, ioc_list, filename)) mal_score += ioc_count * 2 return {'details': details, 'mal_score': mal_score} except Exception as e: logging.error("Failed to parse macros for: {} - {}".format( filename, e)) return
def _ole_analysis(full_targ_path): # This function calls a number of tools / scripts to run against document samples containing OLE data and extracts data and/or performs analysis as needed. try: vba_parse_Obj = VBA_Parser(full_targ_path) except AttributeError: return ("ERROR_PARSING", "ERROR_PARSING", "ERROR_PARSING", "ERROR_PARSING") macro_analysis_over = [] macro_analysis_info = [] if vba_parse_Obj.detect_vba_macros(): vba_macro = "Present" # Utilizing oletools to perform analysis. # Grabbing info from each macro. MA_CNT = 1 for (file_name, ole_stream, vba_filename, vba_code) in vba_parse_Obj.extract_macros(): macro_analysis_over.append(str(MA_CNT) + ':' + str(full_targ_path)) macro_analysis_over.append( str(MA_CNT) + ":Filename :" + file_name) macro_analysis_over.append( str(MA_CNT) + ":OLE Stream :" + ole_stream) macro_analysis_over.append( str(MA_CNT) + ":VBA Filename :" + vba_filename) macro_analysis_over.append(str(MA_CNT) + ':' + vba_code) MA_CNT += 1 # Grabbing some overall VBA analysis info. macro_flag_types = [] macro_analysis_res = vba_parse_Obj.analyze_macros() if isinstance(macro_analysis_res, list): for iocType in macro_analysis_res: if str(iocType) not in macro_flag_types: macro_flag_types.append(str(iocType[0])) if len(macro_flag_types) > 0: iocs = ':'.join(list(set(macro_flag_types))) else: iocs = "None" else: vba_macro = "None" iocs = "None" macro_analysis_res = "None" vba_parse_Obj.close() return (vba_macro, macro_analysis_over, str(macro_analysis_res), iocs)
def _run(self, scanObject, result, depth, args): moduleResult = [] vbap_buffer = VBA_Parser(scanObject.buffer) try: if vbap_buffer.detect_vba_macros(): vbap_result = vbap_buffer.analyze_macros() for kw_type, keyword, description in vbap_result: kw = '%s - %s' % ( keyword,description ) scanObject.addMetadata(self.module_name,kw_type,kw) except (QuitScanException, GlobalScanTimeoutError, GlobalModuleTimeoutError): raise except: logging.debug("Failed to parse OLEVBA") vbap_buffer.close() return moduleResult
def _ole_analysis(full_targ_path): # This function calls a number of tools / scripts to run against document samples containing OLE data and extracts data and/or performs analysis as needed. try: vba_parse_Obj = VBA_Parser(full_targ_path) except AttributeError: return("ERROR_PARSING", "ERROR_PARSING", "ERROR_PARSING", "ERROR_PARSING") macro_analysis_over = [] macro_analysis_info = [] if vba_parse_Obj.detect_vba_macros(): vba_macro = "Present" # Utilizing oletools to perform analysis. # Grabbing info from each macro. MA_CNT = 1 for (file_name, ole_stream, vba_filename, vba_code) in vba_parse_Obj.extract_macros(): macro_analysis_over.append(str(MA_CNT)+':'+str(full_targ_path)) macro_analysis_over.append(str(MA_CNT)+":Filename :"+file_name) macro_analysis_over.append(str(MA_CNT)+":OLE Stream :"+ole_stream) macro_analysis_over.append(str(MA_CNT)+":VBA Filename :"+vba_filename) macro_analysis_over.append(str(MA_CNT)+':'+vba_code) MA_CNT+=1 # Grabbing some overall VBA analysis info. macro_flag_types = [] macro_analysis_res = vba_parse_Obj.analyze_macros() if isinstance(macro_analysis_res, list): for iocType in macro_analysis_res: if str(iocType) not in macro_flag_types: macro_flag_types.append(str(iocType[0])) if len(macro_flag_types) > 0: iocs = ':'.join(list(set(macro_flag_types))) else: iocs = "None" else: vba_macro = "None" iocs = "None" macro_analysis_res = "None" vba_parse_Obj.close() return(vba_macro, macro_analysis_over, str(macro_analysis_res), iocs)
def MacroHunter(targetFile): answerTable = PrettyTable() answerTable.field_names = [f"{green}Threat Levels{white}", f"{green}Macros{white}", f"{green}Descriptions{white}"] print(f"\n{infoS} Looking for VBA Macros...") fileData = open(targetFile, "rb").read() vbaparser = VBA_Parser(targetFile, data=fileData) if vbaparser.contains_macros == True: macroList = list(vbaparser.analyze_macros()) for fi in range(0, len(macroList)): if macroList[fi][0] == 'Suspicious': answerTable.add_row([f"{yellow}{macroList[fi][0]}{white}", f"{macroList[fi][1]}", f"{macroList[fi][2]}"]) elif macroList[fi][0] == 'IOC': answerTable.add_row([f"{magenta}{macroList[fi][0]}{white}", f"{macroList[fi][1]}", f"{macroList[fi][2]}"]) elif macroList[fi][0] == 'AutoExec': answerTable.add_row([f"{red}{macroList[fi][0]}{white}", f"{macroList[fi][1]}", f"{macroList[fi][2]}"]) else: answerTable.add_row([f"{macroList[fi][0]}", f"{macroList[fi][1]}", f"{macroList[fi][2]}"]) print(f"{answerTable}\n") else: print(f"{errorS} Not any macros found.")
def detect(self, filename): return_list = [] vbaparser = VBA_Parser(filename) if vbaparser.detect_vba_macros(): for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros(): return_list.append({ 'filename': filename, 'ole_stream': stream_path, 'vba_filename': vba_filename, 'vba_code': vba_code }) results = vbaparser.analyze_macros() for kw_type, keyword, description in results: return_list.append({ 'type': kw_type, 'keyword': keyword, 'description': description }) return_list.append({'revealed_macro': vbaparser.reveal()}) return return_list else: return None
def inspect_vba_data(self, filename, filecontent): ''' Function to parse the given data in mail content ''' vbaparser_report_log = '' # reset var # send data to vba parser vbaparser = VBA_Parser(filename, data=filecontent) # if a macro is detected if not vbaparser.detect_vba_macros(): self.log("VBA no Macros found in file") vbaparser.close() return None # nothing found else: results = vbaparser.analyze_macros() nr = 1 self.log("VBA Macros found") # generate report for log file for kw_type, keyword, description in results: if kw_type == 'Suspicious': vbaparser_report_log += 'Macro Number %i:\n Type: %s\n Keyword: %s\n Description: %s\n' % ( nr, kw_type, keyword, description) nr += 1 vbaparser_report_log += '\nSummery:\nAutoExec keywords: %d\n' % vbaparser.nb_autoexec vbaparser_report_log += 'Suspicious keywords: %d\n' % vbaparser.nb_suspicious vbaparser_report_log += 'IOCs: %d\n' % vbaparser.nb_iocs vbaparser_report_log += 'Hex obfuscated strings: %d\n' % vbaparser.nb_hexstrings vbaparser_report_log += 'Base64 obfuscated strings: %d\n' % vbaparser.nb_base64strings vbaparser_report_log += 'Dridex obfuscated strings: %d\n' % vbaparser.nb_dridexstrings vbaparser_report_log += 'VBA obfuscated strings: %d' % vbaparser.nb_vbastrings # TBD: calculate a better level - add additional values to Base64 and Hex. No human writes such strings into code. r_level = vbaparser.nb_autoexec + vbaparser.nb_suspicious + vbaparser.nb_iocs + vbaparser.nb_hexstrings + vbaparser.nb_base64strings + vbaparser.nb_dridexstrings + vbaparser.nb_vbastrings # set reject level to global #self.level = r_level vbaparser.close() return [r_level, vbaparser_report_log] # return the log to caller
def ProcessFile(path): if not(os.path.isfile(path)): print '{0} not a file!'.format(path) return 2 try: data = {} data['valid'] = True oledata = {} vbaparser = VBA_Parser(path) oledata['has_macros'] = vbaparser.detect_vba_macros() # dump macros content macros = [] for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros(): macro = {} macro['filename'] = filename macro['stream'] = stream_path macro['vba'] = vba_filename macro['content'] = convert_to_printable_null_terminated(vba_code) macros.append(macro) oledata['macros'] = macros # macro analysis macros_warnings = [] results = vbaparser.analyze_macros() for kw_type, keyword, description in results: warning = {} warning['type'] = kw_type warning['keyword'] = keyword warning['description'] = description macros_warnings.append(warning) oledata['macros_warnings'] = macros_warnings # counters counters = {} counters['autoexec'] = vbaparser.nb_autoexec counters['suspicious'] = vbaparser.nb_suspicious counters['iocs'] = vbaparser.nb_iocs counters['hexstrings'] = vbaparser.nb_hexstrings counters['base64strings'] = vbaparser.nb_base64strings counters['dridexstrings'] = vbaparser.nb_dridexstrings counters['vbastrings'] = vbaparser.nb_vbastrings oledata['counters'] = counters # deobfuscation oledata['deobfuscated'] = convert_to_printable_null_terminated(vbaparser.reveal()) # close vbaparser.close() data['data'] = oledata encoded = json.dumps(data) print encoded except Exception as ex: data = {} data['valid'] = False data['error'] = str(ex) print json.dumps(data) return 1 return 0
print ("- "*39) print ("vba_code",vba_code) vba_scanner = VBA_Scanner(vba_code) results = vba_scanner.scan(include_decoded_strings=True) for kw_type, keyword, description in results: print ("type=%s - keyword=%s - description=%s" % (kw_type, keyword, description)) autoexec_keywords = detect_autoexec(vba_code) if autoexec_keywords: print("Auto-executable Macro keyword is detected. :") for keyword, description in autoexec_keywords: print("%s: %s' % (keyword, description)") else: print ("Auto-executable Macro keywords is not detected.") results = vbaparser.analyze_macros() for kw_type, keyword, description in results: print ("type=%s - keyword=%s - description=%s" % (kw_type, keyword, description)) print ("AutoExec keywords: %d" % vbaparser.nb_autoexec) print ("Suspicious keywords: %d" % vbaparser.nb_suspicious) print ("IOCs: %d" % vbaparser.nb_iocs) print ("Hex obfuscated strings: %d" % vbaparser.nb_hexstrings) print ("Base64 obfuscated strings: %d" % vbaparser.nb_base64strings) print ("Dridex obfuscated strings: %d" % vbaparser.nb_dridexstrings) print ("VBA obfuscated strings: %d" % vbaparser.nb_vbastrings) # 퍼져 닫기 vbaparser.close()
def parse_vba(file): print_output(file, '\n\n-----------------------------------------\n[Analyzing with olevba]\n-----------------------------------------\n', 'text') ole_macro_result = 'no vb-macro' has_macros = False indicators = [] macro_indicators = [] vbaparser = VBA_Parser(filescanner_proc_dir+file) # Check for Macros if not vbaparser.detect_vba_macros(): print_output(file, '[-] No Macros Found', 'text') return has_macros, ole_macro_result, indicators if True: print_output(file, '[-] MACROS FOUND', 'text') has_macros = True ole_macro_result = 'VB-MACRO FOUND' # Variable to be passed to MacroRaptor vba_code_all_modules = '' for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_all_macros(): vba_code_all_modules += vba_code + '\n' for (filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros(): print_output(file, '\nOLE Stream: {0}'.format(string_clean(stream_path)), 'text') print_output(file, 'VBA Filename: {0}'.format(string_clean(vba_filename)), 'text') # Analyse the VBA Code results = vbaparser.analyze_macros(show_decoded_strings=True) for kw_type, keyword, description in results: # Add IOC detections to indicators list if kw_type == 'IOC': indicators.append(description+': '+keyword) print_output(file, '{} - {} - {}'.format(kw_type, keyword, description), 'shell') else: print_output(file, '{} - {} - {}'.format(kw_type, keyword, description), 'text') # Deobfusgate and return macro code # print_output(file, '\n'+vbaparser.reveal(), 'text') # Print number of items in each category and append to indicators list print_output(file, '', 'shell') print_output(file, 'AutoExec keywords: {}'.format(vbaparser.nb_autoexec), 'shell') if vbaparser.nb_autoexec != 0: indicators.append('AutoExec keywords: {}'.format(vbaparser.nb_autoexec)) print_output(file, 'Suspicious keywords: {}'.format(vbaparser.nb_suspicious), 'shell') if vbaparser.nb_suspicious != 0: indicators.append('Suspicious keywords: {}'.format(vbaparser.nb_suspicious)) print_output(file, 'IOCs: {}'.format(vbaparser.nb_iocs), 'shell') print_output(file, 'Hex obfuscated strings: {}'.format(vbaparser.nb_hexstrings), 'shell') if vbaparser.nb_hexstrings != 0: indicators.append('Hex obfuscated strings: {}'.format(vbaparser.nb_hexstrings)) print_output(file, 'Base64 obfuscated strings: {}'.format(vbaparser.nb_base64strings), 'shell') if vbaparser.nb_base64strings != 0: indicators.append('Base64 obfuscated strings: {}'.format(vbaparser.nb_base64strings)) print_output(file, 'Dridex obfuscated strings: {}'.format(vbaparser.nb_dridexstrings), 'shell') if vbaparser.nb_dridexstrings != 0: indicators.append('Dridex obfuscated strings: {}'.format(vbaparser.nb_dridexstrings)) print_output(file, 'VBA obfuscated strings: {}'.format(vbaparser.nb_vbastrings), 'shell') if vbaparser.nb_vbastrings != 0: indicators.append('VBA obfuscated strings: {}'.format(vbaparser.nb_vbastrings)) # Update indicators list with matches from MRaptor macro_indicators = scan_macro(file, vba_code_all_modules) indicators = indicators + macro_indicators # Use oledump to gather VBA code for archiving oledump_scan(file, '-p plugin_vba_summary.py') # Close the file vbaparser.close() return has_macros, ole_macro_result, indicators
sus_filedata = open(sus_file, 'rb').read() vbaparser = VBA_Parser(sus_file, data=sus_filedata) ## Manual Macro extraction details ##for (sys.argv[1], stream_path, vba_filename, vba_code) in vbaparser.extract_macros(): ## print '-' * 79 ## print 'Filename :', sys.argv[1] ## print 'OLE stream :', stream_path ## print 'VBA filename :', vba_filename ## print '- ' * 39 ## print vba_code # Macros analysis global results results = vbaparser.analyze_macros() def show_message_macro_analysis(): print ' ' * 40 + '===============' print ' ' * 40 + 'Macros analysis' print ' ' * 40 + '===============\n\n' def show_message_keywords(): print ' Type\t\tKeyword\t\t Description' print '==========\t==========\t\t=========================================================' for kw_type, keyword, description in results: print '%s\t%s\t\t\t%s'% (kw_type, keyword, description) print '\n\n' def show_message_indicators(): print '=' * 35 print 'AutoExec keywords: %d' % vbaparser.nb_autoexec
class DocInfo(FileAnalyzer): def set_config(self, additional_config_params): self.olevba_results = {} self.vbaparser = None self.experimental = additional_config_params.get("experimental", False) self.passwords_to_check = [] # this is to extract the passwords for encryption requested by the client # you can use pyintelowl to send additional passwords to check for # example: # "additional_configuration": { # "Doc_Info_Experimental": { # "additional_passwords_to_check": ["testpassword"] # } # }, additional_passwords_to_check = additional_config_params.get( "additional_passwords_to_check", [] ) if isinstance(additional_passwords_to_check, list): self.passwords_to_check.extend(additional_passwords_to_check) def run(self): results = {} # olevba try: self.vbaparser = VBA_Parser(self.filepath) self.manage_encrypted_doc() if self.experimental: self.experimental_analysis() # go on with the normal oletools execution self.olevba_results["macro_found"] = self.vbaparser.detect_vba_macros() if self.olevba_results["macro_found"]: vba_code_all_modules = "" macro_data = [] for ( v_filename, stream_path, vba_filename, vba_code, ) in self.vbaparser.extract_macros(): extracted_macro = { "filename": v_filename, "ole_stream": stream_path, "vba_filename": vba_filename, "vba_code": vba_code, } macro_data.append(extracted_macro) vba_code_all_modules += vba_code + "\n" self.olevba_results["macro_data"] = macro_data # example output # # {'description': 'Runs when the Word document is opened', # 'keyword': 'AutoOpen', # 'type': 'AutoExec'}, # {'description': 'May run an executable file or a system command', # 'keyword': 'Shell', # 'type': 'Suspicious'}, # {'description': 'May run an executable file or a system command', # 'keyword': 'WScript.Shell', # 'type': 'Suspicious'}, # {'description': 'May run an executable file or a system command', # 'keyword': 'Run', # 'type': 'Suspicious'}, # {'description': 'May run PowerShell commands', # 'keyword': 'powershell', # 'type': 'Suspicious'}, # {'description': '9BA55BE5', 'keyword': 'xxx', 'type': 'Hex String'}, # mraptor macro_raptor = mraptor.MacroRaptor(vba_code_all_modules) if macro_raptor: macro_raptor.scan() results["mraptor"] = ( "suspicious" if macro_raptor.suspicious else "ok" ) # analyze macros analyzer_results = self.vbaparser.analyze_macros() # it gives None if it does not find anything if analyzer_results: analyze_macro_results = [] for kw_type, keyword, description in analyzer_results: if kw_type != "Hex String": analyze_macro_result = { "type": kw_type, "keyword": keyword, "description": description, } analyze_macro_results.append(analyze_macro_result) self.olevba_results["analyze_macro"] = analyze_macro_results except CannotDecryptException as e: logger.info(e) except Exception as e: error_message = f"job_id {self.job_id} vba parser failed. Error: {e}" logger.exception(error_message) self.report["errors"].append(error_message) finally: if self.vbaparser: self.vbaparser.close() results["olevba"] = self.olevba_results return results def manage_encrypted_doc(self): self.olevba_results["is_encrypted"] = False # checks if it is an OLE file. That could be encrypted if self.vbaparser.ole_file: # check if the ole file is encrypted is_encrypted = self.vbaparser.detect_is_encrypted() self.olevba_results["is_encrypted"] = is_encrypted # in the case the file is encrypted I try to decrypt it # with the default password and the most common ones if is_encrypted: # by default oletools contains some basic passwords # we just add some more guesses common_pwd_to_check = [] for num in range(10): common_pwd_to_check.append(f"{num}{num}{num}{num}") # https://twitter.com/JohnLaTwC/status/1265377724522131457 filename_without_spaces_and_numbers = sub("[-_\d\s]", "", self.filename) filename_without_extension = sub( "(\..+)", "", filename_without_spaces_and_numbers ) common_pwd_to_check.append(filename_without_extension) self.passwords_to_check.extend(common_pwd_to_check) decrypted_file_name = self.vbaparser.decrypt_file( self.passwords_to_check ) self.olevba_results[ "additional_passwords_tried" ] = self.passwords_to_check if decrypted_file_name: self.vbaparser = VBA_Parser(decrypted_file_name) else: self.olevba_results["cannot_decrypt"] = True raise CannotDecryptException( "cannot decrypt the file with the default password" ) def experimental_analysis(self): self.manage_xlm_macros() def manage_xlm_macros(self): self.olevba_results["xlm_macro"] = False # check if the file contains an XLM macro # and try an experimental parsing # credits to https://twitter.com/gabriele_pippi for the idea if self.vbaparser.detect_xlm_macros(): self.olevba_results["xlm_macro"] = True logger.debug("experimental XLM macro analysis start") parsed_file = b"" try: excel_doc = XLSWrapper2(self.filepath) ae_list = [ "auto_open", "auto_close", "auto_activate", "auto_deactivate", ] self.olevba_results["xlm_macro_autoexec"] = [] for ae in ae_list: auto_exec_labels = excel_doc.get_defined_name(ae, full_match=False) for label in auto_exec_labels: self.olevba_results["xlm_macro_autoexec"].append(label[0]) for i in show_cells(excel_doc): rec_str = "" if len(i) == 5: # rec_str = 'CELL:{:10}, {:20}, {}' # .format(i[0].get_local_address(), i[2], i[4]) if i[2] != "None": rec_str = "{:20}".format(i[2]) if rec_str: parsed_file += rec_str.encode() parsed_file += b"\n" except Exception as e: logger.info(f"experimental XLM macro analysis failed. Exception: {e}") else: logger.debug( f"experimental XLM macro analysis succeded. " f"Binary to analyze: {parsed_file}" ) if parsed_file: self.vbaparser = VBA_Parser(self.filename, data=parsed_file)
def run(self): results = {} # olevba olevba_results = {} try: vbaparser = VBA_Parser(self.filepath) olevba_results["macro_found"] = ( True if vbaparser.detect_vba_macros() else False ) if olevba_results["macro_found"]: macro_data = [] for ( v_filename, stream_path, vba_filename, vba_code, ) in vbaparser.extract_macros(): extracted_macro = { "filename": v_filename, "ole_stream": stream_path, "vba_filename": vba_filename, "vba_code": vba_code, } macro_data.append(extracted_macro) olevba_results["macro_data"] = macro_data # example output """ {'description': 'Runs when the Word document is opened', 'keyword': 'AutoOpen', 'type': 'AutoExec'}, {'description': 'May run an executable file or a system command', 'keyword': 'Shell', 'type': 'Suspicious'}, {'description': 'May run an executable file or a system command', 'keyword': 'WScript.Shell', 'type': 'Suspicious'}, {'description': 'May run an executable file or a system command', 'keyword': 'Run', 'type': 'Suspicious'}, {'description': 'May run PowerShell commands', 'keyword': 'powershell', 'type': 'Suspicious'}, {'description': '9BA55BE5', 'keyword': 'xxx', 'type': 'Hex String'}, """ analyzer_results = vbaparser.analyze_macros(show_decoded_strings=True) # it gives None if it does not find anything if analyzer_results: analyze_macro_results = [] for kw_type, keyword, description in analyzer_results: if kw_type != "Hex String": analyze_macro_result = { "type": kw_type, "keyword": keyword, "description": description, } analyze_macro_results.append(analyze_macro_result) olevba_results["analyze_macro"] = analyze_macro_results olevba_results["reveal"] = vbaparser.reveal() vbaparser.close() except Exception as e: traceback.print_exc() error_message = f"job_id {self.job_id} vba parser failed. Error: {e}" logger.exception(error_message) self.report["errors"].append(error_message) results["olevba"] = olevba_results # mraptor macro_raptor = mraptor.MacroRaptor(olevba_results.get("reveal", None)) if macro_raptor: macro_raptor.scan() results["mraptor"] = "suspicious" if macro_raptor.suspicious else "ok" return results
def run(analyzer_name, job_id, filepath, filename, md5, additional_config_params): logger.info("started analyzer {} job_id {}" "".format(analyzer_name, job_id)) report = general.get_basic_report_template(analyzer_name) try: results = {} # olevba olevba_results = {} try: vbaparser = VBA_Parser(filepath) olevba_results[ 'macro_found'] = True if vbaparser.detect_vba_macros( ) else False if olevba_results['macro_found']: macro_data = [] for (v_filename, stream_path, vba_filename, vba_code) in vbaparser.extract_macros(): extracted_macro = { "filename": v_filename, "ole_stream": stream_path, "vba_filename": vba_filename, "vba_code": vba_code } macro_data.append(extracted_macro) olevba_results['macro_data'] = macro_data # example output ''' {'description': 'Runs when the Word document is opened', 'keyword': 'AutoOpen', 'type': 'AutoExec'}, {'description': 'May run an executable file or a system command', 'keyword': 'Shell', 'type': 'Suspicious'}, {'description': 'May run an executable file or a system command', 'keyword': 'WScript.Shell', 'type': 'Suspicious'}, {'description': 'May run an executable file or a system command', 'keyword': 'Run', 'type': 'Suspicious'}, {'description': 'May run PowerShell commands', 'keyword': 'powershell', 'type': 'Suspicious'}, {'description': '9BA55BE5', 'keyword': 'xxx', 'type': 'Hex String'}, ''' analyzer_results = vbaparser.analyze_macros( show_decoded_strings=True) # it gives None if it does not find anything if analyzer_results: analyze_macro_results = [] for kw_type, keyword, description in analyzer_results: if kw_type != 'Hex String': analyze_macro_result = { "type": kw_type, "keyword": keyword, "description": description } analyze_macro_results.append(analyze_macro_result) olevba_results['analyze_macro'] = analyze_macro_results olevba_results['reveal'] = vbaparser.reveal() vbaparser.close() except Exception as e: traceback.print_exc() error_message = "job_id {} vba parser failed. Error: {}".format( job_id, e) logger.exception(error_message) report['errors'].append(error_message) results['olevba'] = olevba_results # mraptor macro_raptor = mraptor.MacroRaptor(olevba_results.get('reveal', '')) if macro_raptor: macro_raptor.scan() results[ 'mraptor'] = "suspicious" if macro_raptor.suspicious else 'ok' # pprint.pprint(results) report['report'] = results except AnalyzerRunException as e: error_message = "job_id:{} analyzer:{} md5:{} filename: {} Analyzer Error {}" \ "".format(job_id, analyzer_name, md5, filename, e) logger.error(error_message) report['errors'].append(error_message) report['success'] = False except Exception as e: traceback.print_exc() error_message = "job_id:{} analyzer:{} md5:{} filename: {} Unexpected Error {}" \ "".format(job_id, analyzer_name, md5, filename, e) logger.exception(error_message) report['errors'].append(str(e)) report['success'] = False else: report['success'] = True general.set_report_and_cleanup(job_id, report, logger) logger.info("ended analyzer {} job_id {}" "".format(analyzer_name, job_id)) return report
def collect_events(helper, ew): if helper.get_arg('endpoint') == 'worldwide': graph_url = 'https://graph.microsoft.com/v1.0' elif helper.get_arg('endpoint') == 'gcchigh': graph_url = 'https://graph.microsoft.us/v1.0' access_token = _get_access_token(helper) headers = { "Authorization": "Bearer " + access_token, "User-Agent": "MicrosoftGraphEmail-Splunk/" + _get_app_version(helper) } #"Prefer": "outlook.body-content-type=text"} #defining email account to retrieve messages from endpoint = "/users/" + helper.get_arg('audit_email_account') #defining inbox id to retrieve messages from endpoint += "/mailFolders/inbox/messages/" #expanding property id 0x0E08 to gather message size, and then expanding attachments to get fileattachment type contentBytes endpoint += "?$expand=SingleValueExtendedProperties($filter=Id eq 'LONG 0x0E08'),attachments" #selecting which fields to retrieve from emails endpoint += "&$select=receivedDateTime,subject,sender,from,hasAttachments,internetMessageId,toRecipients,ccRecipients,bccRecipients,replyTo,internetMessageHeaders,body,bodyPreview,isReadReceiptRequested,isDeliveryReceiptRequested" #defining how many messages to retrieve from each page endpoint += "&$top=980" #getting the oldest messages first endpoint += "&$orderby=receivedDateTime" #getting the total count of messages in each round endpoint += "&$count=true" messages_response = helper.send_http_request(graph_url + endpoint, "GET", headers=headers, parameters=None, timeout=(15.0, 15.0)).json() helper.log_info("Retrieving " + str(messages_response['@odata.count']) + " messages") messages = [] #Routine that iterates through the messages. Uses the @odata.nextLink values to find the next endpoint to query. messages.append(messages_response['value']) #Calculate how many pages of 980 messages we'll attempt based on the interval value. Helps to keep requests within API limits. interval_in_seconds = int(helper.get_arg('interval')) url_count_limit = (interval_in_seconds // 60) - 1 if url_count_limit > 0: url_count = 0 while ("@odata.nextLink" in messages_response) and (is_https( messages_response["@odata.nextLink"])): if url_count < url_count_limit: nextlinkurl = messages_response["@odata.nextLink"] messages_response = helper.send_http_request( nextlinkurl, "GET", headers=headers, parameters=None, timeout=(15.0, 15.0)).json() messages.append(messages_response['value']) url_count += 1 else: helper.log_debug("Protecting API limits, breaking out") break #Routine to find attachments in messages. This caters for both standard, as well as inline attachments. MS Graph doesn't list inline attachments in the "hasAttachments" value, this fixes that. message_data = [] attach_data = [] for message in messages: for item in message: message_items = {} message_items['_time'] = item['receivedDateTime'] message_items['to'] = item['toRecipients'] message_items['from'] = item['from'] message_items['sender'] = item['sender'] message_items['subject'] = item['subject'] message_items['id'] = item['id'] message_items['internetMessageId'] = item['internetMessageId'] message_items['ccRecipients'] = item['ccRecipients'] message_items['bccRecipients'] = item['bccRecipients'] message_items['replyTo'] = item['replyTo'] message_items['hasAttachments'] = item['hasAttachments'] message_body = item['body']['content'] body_preview = item['bodyPreview'] attachments = item['attachments'] single_value_properties = item['singleValueExtendedProperties'] if 'internetMessageHeaders' in item: internet_message_headers = item['internetMessageHeaders'] if helper.get_arg('get_internet_headers'): message_items[ 'Internet-Headers'] = internet_message_headers #message path calculations message_path = [] path_item = {} for item in internet_message_headers: if item['name'] == "Received": path_item = item message_path.append(path_item) src_line = str(message_path[-1]) dest_line = str(message_path[0]) re_by = re.compile(r'(?<=\bby\s)(\S+)') re_from = re.compile(r'(?<=\bfrom\s)(\S+)') dest = re_by.search(dest_line) if re_from.search(src_line): src = re_from.search(src_line) elif re_by.search(src_line): src = re_by.search(src_line) message_items['src'] = str(src[0]) message_items['dest'] = str(dest[0]) if helper.get_arg('get_message_path'): message_items['message_path'] = message_path if helper.get_arg('get_x_headers'): x_headers = [] x_header_item = {} for item in internet_message_headers: if "X-" in item['name']: x_header_item = item x_headers.append(x_header_item) message_items['X-Headers'] = x_headers if helper.get_arg('get_auth_results'): auth_results = [] auth_results_item = {} for item in internet_message_headers: if "Authentication-Results" in item['name']: auth_results_item = item auth_results.append(auth_results_item) message_items['Authentication-Results'] = auth_results if helper.get_arg('get_spf_results'): spf_results = [] spf_results_item = {} for item in internet_message_headers: if "Received-SPF" in item['name']: spf_results_item = item spf_results.append(spf_results_item) message_items['Received-SPF'] = spf_results if helper.get_arg('get_dkim_signature'): dkim_sig = [] dkim_sig_item = {} for item in internet_message_headers: if "DKIM-Signature" in item['name']: dkim_sig_item = item dkim_sig.append(dkim_sig_item) message_items['DKIM-Signature'] = dkim_sig #tracking pixel detection if pixeltrack_re.search(message_body): pixel_data = pixeltrack_re.search(message_body) message_items['tracking_pixel'] = "true" message_items['tracking_pixel_data'] = pixel_data.group(0) else: message_items['tracking_pixel'] = "false" #size mapping for item in single_value_properties: if item['id'] == "Long 0xe08": message_items['size'] = item['value'] if helper.get_arg('get_body'): message_items['body'] = message_body if helper.get_arg('get_body_preview'): message_items['bodyPreview'] = body_preview if helper.get_arg('get_internet_headers'): message_items['Internet-Headers'] = internet_message_headers if helper.get_arg('get_attachment_info'): message_items['attachments'] = attachments if helper.get_arg('get_body'): if helper.get_arg('extract_iocs'): iocs = extract_iocs(helper, message_items["body"]) email_iocs = [] for ioc in iocs: if not ioc in email_iocs: email_iocs.append(ioc) if email_iocs: message_items['iocs'] = email_iocs if helper.get_arg('get_attachment_info'): if message_items['attachments'] is not None: for attachment in message_items["attachments"]: #Looks for itemAttachment type, which is a contact, event, or message that's attached. if attachment[ "@odata.type"] == "#microsoft.graph.itemAttachment": my_added_data = {} my_added_data['name'] = attachment['name'] my_added_data['odata_type'] = attachment[ '@odata.type'] my_added_data['id'] = attachment['id'] my_added_data['contentType'] = attachment[ 'contentType'] my_added_data['size'] = attachment['size'] attach_data.append(my_added_data) #Looks for referenceAttachment type, which is a link to a file on OneDrive or other supported storage location if attachment[ "@odata.type"] == "#microsoft.graph.referenceAttachment": my_added_data = {} my_added_data['name'] = attachment['name'] my_added_data['odata_type'] = attachment[ '@odata.type'] my_added_data['id'] = attachment['id'] my_added_data['contentType'] = attachment[ 'contentType'] my_added_data['size'] = attachment['size'] attach_data.append(my_added_data) #Looks for fileAttachment type, which is a standard email attachment. if attachment[ "@odata.type"] == "#microsoft.graph.fileAttachment": my_added_data = {} attach_b64decode = base64.b64decode( attachment['contentBytes']) #Selects which hashing algorithm (md5, sha1, sha256) to use on the attachment. if helper.get_arg( 'get_attachment_info') and helper.get_arg( 'file_hash_algorithm') == 'md5': hash_object = hashlib.md5(attach_b64decode) if helper.get_arg( 'get_attachment_info') and helper.get_arg( 'file_hash_algorithm') == 'sha1': hash_object = hashlib.sha1(attach_b64decode) if helper.get_arg( 'get_attachment_info') and helper.get_arg( 'file_hash_algorithm') == 'sha256': hash_object = hashlib.sha256(attach_b64decode) att_hash = hash_object.hexdigest() my_added_data['name'] = attachment['name'] my_added_data['odata_type'] = attachment[ '@odata.type'] my_added_data['id'] = attachment['id'] my_added_data['contentType'] = attachment[ 'contentType'] my_added_data['size'] = attachment['size'] my_added_data['file_hash'] = att_hash #Attempts to open up zip file to list file names and hashes if the option is selected in the input. if helper.get_arg( 'get_attachment_info' ) and helper.get_arg( 'read_zip_files' ) and attachment[ '@odata.mediaContentType'] == 'application/zip': filedata_encoded = attachment[ 'contentBytes'].encode() file_bytes = base64.b64decode(filedata_encoded) zipbytes = io.BytesIO(file_bytes) try: zipfile = ZipFile(zipbytes) zipmembers = zipfile.namelist() zip_files = [] zip_hashes = [] for file in zipmembers: zip_read = zipfile.read(file) if helper.get_arg('file_hash_algorithm' ) == 'md5': hash_object = hashlib.md5(zip_read) if helper.get_arg('file_hash_algorithm' ) == 'sha1': hash_object = hashlib.sha1( zip_read) if helper.get_arg('file_hash_algorithm' ) == 'sha256': hash_object = hashlib.sha256( zip_read) zip_hash = hash_object.hexdigest() if not file in zip_files: zip_files.append(file) zip_hashes.append(zip_hash) if zip_files: my_added_data[ 'zip_files'] = zip_files my_added_data[ 'zip_hashes'] = zip_hashes except: my_added_data[ 'attention'] = 'could not extract the zip file, may be encrypted' #Routine to gather info on CSV file types. if helper.get_arg( 'get_attachment_info' ) and attachment[ '@odata.mediaContentType'] == 'text/csv': filedata_encoded = attachment[ 'contentBytes'].encode() file_bytes = base64.b64decode(filedata_encoded) csvbytes = io.BytesIO(file_bytes) try: csvstring = csvbytes.read().decode('utf-8') if helper.get_arg('extract_iocs'): iocs = extract_iocs(helper, csvstring) csv_iocs = [] for ioc in iocs: if not ioc in csv_iocs: csv_iocs.append(ioc) if csv_iocs: my_added_data['iocs'] = csv_iocs #Will attempt to ingest the actual contents of the CSV file if this option is selected in the input. if 'csv' in helper.get_arg( 'attachment_data_ingest'): my_added_data['csv_data'] = csvstring except: my_added_data[ 'attention'] = 'could not parse the csv document, may be encrypted' #Routine to gather info on HTML file types. if helper.get_arg( 'get_attachment_info' ) and attachment[ '@odata.mediaContentType'] == 'text/html': filedata_encoded = attachment[ 'contentBytes'].encode() file_bytes = base64.b64decode(filedata_encoded) try: uncooked_soup = html.unescape( str(file_bytes)) soup = BeautifulSoup(uncooked_soup) soup_data = str(soup) if helper.get_arg('extract_iocs'): iocs = extract_iocs(helper, soup_data) html_iocs = [] for ioc in iocs: if not ioc in html_iocs: html_iocs.append(ioc) if html_iocs: my_added_data['iocs'] = html_iocs #Will attempt to ingest the actual contents of the HTML file if this option is selected in the input. if 'html' in helper.get_arg( 'attachment_data_ingest'): my_added_data['html_data'] = soup_data except: my_added_data[ 'attention'] = 'could not parse the html document, may be encrypted' #Routine to gather info on PDF file types. if helper.get_arg( 'get_attachment_info' ) and attachment[ '@odata.mediaContentType'] == 'application/pdf': filedata_encoded = attachment[ 'contentBytes'].encode() file_bytes = base64.b64decode(filedata_encoded) pdf_content = io.BytesIO(file_bytes) output_string = StringIO() try: parser = PDFParser(pdf_content) doc = PDFDocument(parser) rsrcmgr = PDFResourceManager() device = TextConverter(rsrcmgr, output_string, laparams=LAParams()) interpreter = PDFPageInterpreter( rsrcmgr, device) for page in PDFPage.create_pages(doc): interpreter.process_page(page) pdf_text = output_string.getvalue() if helper.get_arg('extract_iocs'): iocs = extract_iocs(helper, pdf_text) pdf_iocs = [] for ioc in iocs: if not ioc in pdf_iocs: pdf_iocs.append(ioc) if pdf_iocs: my_added_data[ 'iocs'] = pdf_iocs #Will attempt to ingest the actual contents of the PDF file if this option is selected in the input. if 'pdf' in helper.get_arg( 'attachment_data_ingest'): my_added_data['pdf_data'] = pdf_text except: my_added_data[ 'attention'] = 'could not parse the pdf document, may be encrypted' #Routine to gather info on XML file types. if helper.get_arg( 'get_attachment_info' ) and attachment[ '@odata.mediaContentType'] == 'text/xml': filedata_encoded = attachment[ 'contentBytes'].encode() file_bytes = base64.b64decode(filedata_encoded) try: soup = BeautifulSoup(file_bytes, 'lxml') soup_data = str(soup) if helper.get_arg('extract_iocs'): iocs = extract_iocs(helper, soup_data) xml_iocs = [] for ioc in iocs: if not ioc in xml_iocs: xml_iocs.append(ioc) if xml_iocs: my_added_data['iocs'] = xml_iocs #Will attempt to ingest the actual contents of the XML file if this option is selected in the input. if 'xml' in helper.get_arg( 'attachment_data_ingest'): my_added_data['xml_data'] = soup_data except: my_added_data[ 'attention'] = 'could not parse the xml document, may be encrypted' #Routine to do macro analysis on files of supported content types listed below if selected in the input setup. This uses OLEVBA tools to detect macros in the attachment, then analyses the macros. if helper.get_arg( 'get_attachment_info') and helper.get_arg( 'macro_analysis'): filename = attachment['name'] #Content types supported by OLEVBA. supported_content = [ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', 'application/vnd.ms-excel.sheet.macroenabled.12', 'application/vnd.ms-excel.template.macroenabled.12', 'application/vnd.ms-excel.addin.macroenabled.12', 'application/vnd.ms-excel.sheet.binary.macroenabled.12', 'application/vnd.ms-excel', 'application/xml', 'application/vnd.ms-powerpoint', 'application/vnd.openxmlformats-officedocument.presentationml.presentation', 'application/vnd.openxmlformats-officedocument.presentationml.template', 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', 'application/vnd.ms-powerpoint.addin.macroenabled.12', 'application/vnd.ms-powerpoint.presentation.macroenabled.12', 'application/vnd.ms-powerpoint.template.macroenabled.12', 'application/vnd.ms-powerpoint.slideshow.macroenabled.12', 'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', 'application/vnd.ms-word.document.macroenabled.12', 'application/vnd.ms-word.template.macroenabled.12' ] if attachment[ '@odata.mediaContentType'] in supported_content: filedata_encoded = attachment[ 'contentBytes'].encode() file_bytes = base64.b64decode( filedata_encoded) try: vbaparser = VBA_Parser(filename, data=file_bytes) if vbaparser.detect_vba_macros(): my_added_data[ 'macros_exist'] = "true" macro_analysis = VBA_Parser.analyze_macros( vbaparser) helper.log_debug( "GET Response: " + json.dumps( macro_analysis, indent=4)) if macro_analysis == []: my_added_data[ 'macro_analysis'] = "Macro doesn't look bad, but I never trust macros." else: my_added_data[ 'macros_analysis'] = macro_analysis else: my_added_data[ 'macros_exist'] = "false" except: my_added_data[ 'attention'] = 'could not extract the office document, may be encrypted' attach_data.append(my_added_data) message_items['attachments'] = attach_data message_data.append(message_items) _write_events(helper, ew, messages=message_data) _purge_messages(helper, messages)