def _utility_ioc_extractor_function(self, event, *args, **kwargs): results = {} results["was_successful"] = False try: # Get the function parameters: incident_id = kwargs.get("incident_id") # number text_string = kwargs.get("text_string") # text log = logging.getLogger(__name__) # Establish logging text_string = unicodedata.normalize( "NFKD", BeautifulSoup(text_string, "html.parser").get_text( ' ')) # Strip HTML and normalize text # Parse IOCs by type from text_string - OrderedDict.fromkeys() preserves order and removes duplicates. results["ipv4s"] = list( OrderedDict.fromkeys( list(iocextract.extract_ipv4s(text_string, refang=True)))) results["ipv6s"] = list( OrderedDict.fromkeys( list(iocextract.extract_ipv6s(text_string)))) results["urls"] = list( OrderedDict.fromkeys( list(iocextract.extract_urls( text_string, refang=True)))) # URLs and domains results["domains"] = list( OrderedDict.fromkeys([ urlparse(url).netloc for url in results["urls"] ])) # domains only results["email_addresses"] = list( OrderedDict.fromkeys( list(iocextract.extract_emails(text_string, refang=True)))) results["email_domains"] = list( OrderedDict.fromkeys([ email.split('@')[1] for email in results["email_addresses"] ])) # domains only results["md5_hashes"] = list( OrderedDict.fromkeys( list(iocextract.extract_md5_hashes(text_string)))) results["sha256_hashes"] = list( OrderedDict.fromkeys( list(iocextract.extract_sha256_hashes(text_string)))) results["was_successful"] = True # Produce a FunctionResult with the results yield FunctionResult(results) except Exception: yield FunctionError()
def test_md5_extract(self): content = '68b329da9893e34099c7d8ad5cb9c940' self.assertEquals(list(iocextract.extract_md5_hashes(content))[0], content) self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_spaces(content)))[0], content) self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_tabs(content)))[0], content) self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_newlines(content)))[0], content) self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_words(content)))[0], content) self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_nonwords(content)))[0], content)
def create_group_pulse(input_text): # Create the pulse title unix_time = str(int(time.time())) pulse_title = 'SlackIOCs - ' + unix_time API_KEY = '' otx = OTXv2(API_KEY) group_id = 840 # Create a list of indicators indicators = [] for url in iocextract.extract_urls(input_text): indicators.append({'indicator': url, 'type': 'URL'}) for ip in iocextract.extract_ips(input_text): indicators.append({'indicator': ip, 'type': 'IPv4'}) for sha256 in iocextract.extract_sha256_hashes(input_text): indicators.append({'indicator': sha256, 'type': 'FileHash-SHA256'}) for sha1 in iocextract.extract_sha1_hashes(input_text): indicators.append({'indicator': sha1, 'type': 'FileHash-SHA1'}) for md5 in iocextract.extract_md5_hashes(input_text): indicators.append({'indicator': md5, 'type': 'FileHash-MD5'}) for email in iocextract.extract_emails(input_text): indicators.append({'indicator': email, 'type': 'EMAIL'}) print('Adding ' + str(indicators)) response = otx.create_pulse(name=pulse_title, public=True, indicators=indicators, tags=['covid19'], references=[], group_ids=[group_id], tlp='White') print('Response: ' + str(response))
def CapeReporter(values): cape_val = [] for usrInput in values: chk_ip = list(iocextract.extract_ipv4s(usrInput)) chk_url = list(iocextract.extract_urls(usrInput)) chk_md5 = list(iocextract.extract_md5_hashes(usrInput)) chk_sha1 = list(iocextract.extract_sha1_hashes(usrInput)) chk_256 = list(iocextract.extract_sha256_hashes(usrInput)) if chk_url: usrInput = chk_url[0] argType = 'url' stream = allReport(usrInput, argType) for data in stream: cape_val.append({'Cape Sandbox': data}) elif chk_ip: usrInput = chk_ip[0] argType = 'ip' stream = allReport(usrInput, argType) for data in stream: cape_val.append({'Cape Sandbox': data}) elif chk_md5: usrInput = chk_md5[0] argType = 'md5' stream = allReport(usrInput, argType) for data in stream: cape_val.append({'Cape Sandbox': data}) elif chk_sha1: usrInput = chk_sha1[0] argType = 'sha1' stream = allReport(usrInput, argType) for data in stream: cape_val.append({'Cape Sandbox': data}) elif chk_256: usrInput = chk_256[0] argType = 'sha256' stream = allReport(usrInput, argType) for data in stream: cape_val.append({'Cape Sandbox': data}) else: pass return cape_val
def test_MD5(self): content = "8d13ed81f15ff53688df90dd38cbd6d6" result = list(iocextract.extract_md5_hashes(content)) self.assertEqual(len(result), 1) self.assertEqual(result[0], content)
def test_md5_not_in_shax(self): content = 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc' self.assertEqual(len(list(iocextract.extract_md5_hashes(content))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_spaces(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_tabs(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_newlines(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_words(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_nonwords(content)))), 0) content = '01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b' self.assertEqual(len(list(iocextract.extract_md5_hashes(content))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_spaces(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_tabs(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_newlines(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_words(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_nonwords(content)))), 0) content = 'be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09' self.assertEqual(len(list(iocextract.extract_md5_hashes(content))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_spaces(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_tabs(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_newlines(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_words(content)))), 0) self.assertEqual( len(list(iocextract.extract_md5_hashes(_wrap_nonwords(content)))), 0)
break if(filename in skip_files): continue # Extract text from pdf filepath = os.path.join(path, filename) content = convert_pdf_txt(filepath) # Extract Indicators of Compromise from text, recording time extracted_files[filename] = {} extract_start_time = time.time() extracted_files[filename]["urls"] = list(iocextract.extract_urls(content, refang=True)) extracted_files[filename]["email_addresses"] = list(iocextract.extract_emails(content, refang=True)) extracted_files[filename]["ipv4s"] = list(iocextract.extract_ipv4s(content, refang=True)) extracted_files[filename]["ipv6s"] = list(iocextract.extract_ipv6s(content)) extracted_files[filename]["md5s"] = list(iocextract.extract_md5_hashes(content)) extracted_files[filename]["sha1s"] = list(iocextract.extract_sha1_hashes(content)) extracted_files[filename]["sha256s"] = list(iocextract.extract_sha256_hashes(content)) extracted_files[filename]["sha512s"] = list(iocextract.extract_sha512_hashes(content)) extracted_files[filename]["yara"] = list(iocextract.extract_yara_rules(content)) extract_avg_numerator += time.time() - extract_start_time count += 1 process_end_time = time.time() # add some meta info on process run time extracted_files["meta"] = { "tool": "iocextract", "files_examined": count, "elapsed_time": process_end_time - process_start_time,