Пример #1
0
    def _utility_ioc_extractor_function(self, event, *args, **kwargs):

        results = {}
        results["was_successful"] = False

        try:
            # Get the function parameters:
            incident_id = kwargs.get("incident_id")  # number
            text_string = kwargs.get("text_string")  # text

            log = logging.getLogger(__name__)  # Establish logging

            text_string = unicodedata.normalize(
                "NFKD",
                BeautifulSoup(text_string, "html.parser").get_text(
                    ' '))  # Strip HTML and normalize text

            # Parse IOCs by type from text_string - OrderedDict.fromkeys() preserves order and removes duplicates.
            results["ipv4s"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_ipv4s(text_string, refang=True))))
            results["ipv6s"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_ipv6s(text_string))))
            results["urls"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_urls(
                        text_string, refang=True))))  # URLs and domains
            results["domains"] = list(
                OrderedDict.fromkeys([
                    urlparse(url).netloc for url in results["urls"]
                ]))  # domains only
            results["email_addresses"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_emails(text_string, refang=True))))
            results["email_domains"] = list(
                OrderedDict.fromkeys([
                    email.split('@')[1] for email in results["email_addresses"]
                ]))  # domains only
            results["md5_hashes"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_md5_hashes(text_string))))
            results["sha256_hashes"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_sha256_hashes(text_string))))
            results["was_successful"] = True

            # Produce a FunctionResult with the results
            yield FunctionResult(results)
        except Exception:
            yield FunctionError()
Пример #2
0
    def test_md5_extract(self):
        content = '68b329da9893e34099c7d8ad5cb9c940'

        self.assertEquals(list(iocextract.extract_md5_hashes(content))[0], content)
        self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_spaces(content)))[0], content)
        self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_tabs(content)))[0], content)
        self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_newlines(content)))[0], content)
        self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_words(content)))[0], content)
        self.assertEquals(list(iocextract.extract_md5_hashes(_wrap_nonwords(content)))[0], content)
Пример #3
0
def create_group_pulse(input_text):
    # Create the pulse title
    unix_time = str(int(time.time()))
    pulse_title = 'SlackIOCs - ' + unix_time

    API_KEY = ''
    otx = OTXv2(API_KEY)

    group_id = 840

    # Create a list of indicators
    indicators = []

    for url in iocextract.extract_urls(input_text):
        indicators.append({'indicator': url, 'type': 'URL'})

    for ip in iocextract.extract_ips(input_text):
        indicators.append({'indicator': ip, 'type': 'IPv4'})

    for sha256 in iocextract.extract_sha256_hashes(input_text):
        indicators.append({'indicator': sha256, 'type': 'FileHash-SHA256'})

    for sha1 in iocextract.extract_sha1_hashes(input_text):
        indicators.append({'indicator': sha1, 'type': 'FileHash-SHA1'})

    for md5 in iocextract.extract_md5_hashes(input_text):
        indicators.append({'indicator': md5, 'type': 'FileHash-MD5'})

    for email in iocextract.extract_emails(input_text):
        indicators.append({'indicator': email, 'type': 'EMAIL'})

    print('Adding ' + str(indicators))

    response = otx.create_pulse(name=pulse_title,
                                public=True,
                                indicators=indicators,
                                tags=['covid19'],
                                references=[],
                                group_ids=[group_id],
                                tlp='White')

    print('Response: ' + str(response))
Пример #4
0
def CapeReporter(values):
    cape_val = []
    for usrInput in values:
        chk_ip = list(iocextract.extract_ipv4s(usrInput))
        chk_url = list(iocextract.extract_urls(usrInput))
        chk_md5 = list(iocextract.extract_md5_hashes(usrInput))
        chk_sha1 = list(iocextract.extract_sha1_hashes(usrInput))
        chk_256 = list(iocextract.extract_sha256_hashes(usrInput))
        if chk_url:
            usrInput = chk_url[0]
            argType = 'url'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        elif chk_ip:
            usrInput = chk_ip[0]
            argType = 'ip'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        elif chk_md5:
            usrInput = chk_md5[0]
            argType = 'md5'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        elif chk_sha1:
            usrInput = chk_sha1[0]
            argType = 'sha1'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        elif chk_256:
            usrInput = chk_256[0]
            argType = 'sha256'
            stream = allReport(usrInput, argType)
            for data in stream:
                cape_val.append({'Cape Sandbox': data})
        else:
            pass

    return cape_val
Пример #5
0
 def test_MD5(self):
     content = "8d13ed81f15ff53688df90dd38cbd6d6"
     result = list(iocextract.extract_md5_hashes(content))
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], content)
Пример #6
0
    def test_md5_not_in_shax(self):
        content = 'adc83b19e793491b1c6ea0fd8b46cd9f32e592fc'

        self.assertEqual(len(list(iocextract.extract_md5_hashes(content))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_spaces(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_tabs(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_newlines(content)))),
            0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_words(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_nonwords(content)))),
            0)

        content = '01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b'

        self.assertEqual(len(list(iocextract.extract_md5_hashes(content))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_spaces(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_tabs(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_newlines(content)))),
            0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_words(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_nonwords(content)))),
            0)

        content = 'be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09'

        self.assertEqual(len(list(iocextract.extract_md5_hashes(content))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_spaces(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_tabs(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_newlines(content)))),
            0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_words(content)))), 0)
        self.assertEqual(
            len(list(iocextract.extract_md5_hashes(_wrap_nonwords(content)))),
            0)
Пример #7
0
                break
            if(filename in skip_files):
                continue

            # Extract text from pdf
            filepath = os.path.join(path, filename)
            content = convert_pdf_txt(filepath)
            
            # Extract Indicators of Compromise from text, recording time
            extracted_files[filename] = {}
            extract_start_time = time.time()
            extracted_files[filename]["urls"] = list(iocextract.extract_urls(content, refang=True))
            extracted_files[filename]["email_addresses"] = list(iocextract.extract_emails(content, refang=True))
            extracted_files[filename]["ipv4s"] = list(iocextract.extract_ipv4s(content, refang=True))
            extracted_files[filename]["ipv6s"] = list(iocextract.extract_ipv6s(content))
            extracted_files[filename]["md5s"] = list(iocextract.extract_md5_hashes(content))
            extracted_files[filename]["sha1s"] = list(iocextract.extract_sha1_hashes(content))
            extracted_files[filename]["sha256s"] = list(iocextract.extract_sha256_hashes(content))
            extracted_files[filename]["sha512s"] = list(iocextract.extract_sha512_hashes(content))
            extracted_files[filename]["yara"] = list(iocextract.extract_yara_rules(content))
            extract_avg_numerator += time.time() - extract_start_time
            
            count += 1

        process_end_time = time.time()

        # add some meta info on process run time
        extracted_files["meta"] = {
            "tool": "iocextract",
            "files_examined": count,
            "elapsed_time": process_end_time - process_start_time,