Пример #1
0
    def test_email_extract(self):
        content_list = [
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '[email protected]',
        ]

        for content in content_list:
            self.assertEquals(list(iocextract.extract_emails(content))[0], content)
            self.assertEquals(list(iocextract.extract_emails(_wrap_spaces(content)))[0], content)
            self.assertEquals(list(iocextract.extract_emails(_wrap_tabs(content)))[0], content)
            self.assertEquals(list(iocextract.extract_emails(_wrap_newlines(content)))[0], content)

        invalid_list = [
            '@a.co',
            'myuser@',
            '@',
            # don't extract non-fqdn emails
            'a@a',
        ]

        for content in invalid_list:
            self.assertEquals(len(list(iocextract.extract_emails(content))), 0)
            self.assertEquals(len(list(iocextract.extract_emails(_wrap_spaces(content)))), 0)
            self.assertEquals(len(list(iocextract.extract_emails(_wrap_tabs(content)))), 0)
            self.assertEquals(len(list(iocextract.extract_emails(_wrap_newlines(content)))), 0)
Пример #2
0
def extractIOC(path):
    extractor = URLExtract()
    try:
        out = execute_command('src\\strings64.exe ' + path)
    except:
        out = execute_command('src\\strings64.exe ' + path)
    out = out.decode("utf-8").split('\n')
    extract_url = []
    ipv4 = []
    ipv6 = []
    emails = []
    for url in iocextract.extract_urls(str(out), refang=True, strip=True):
        n = extractor.find_urls(url)
        try:
            n = n[0]
            n = str(n).replace("\\r", "")
            extract_url.append(n)
        except:
            pass
    extract_url = list(set(extract_url))
    for ip4 in iocextract.extract_ipv4s(str(out), refang=True):
        ipv4.append(ip4)
    for ip6 in iocextract.extract_ipv6s(str(out)):
        ipv6.append(ip6)
    for email in iocextract.extract_emails(str(out), refang=True):
        emails.append(str(email).replace("\\r", ""))
    return (extract_url, ipv4, ipv6, emails)
Пример #3
0
    def test_defang_unsupported_at(self):
        content = "*****@*****.**"
        combinations = [
            ["(@(", ")@(", ")@)", "@(", ")@"],
            [
                "[@[",
                "]@[",
                "]@]",
                "@[",
                "]@",
            ],
            [
                "{@{",
                "}@{",
                "}@}",
                "@{",
                "}@",
            ],
        ]

        for substitution_type in combinations:
            for defang_style in substitution_type:
                defanged_content = content.replace("@", defang_style)
                #print("checking: " + defanged_content)
                result = list(
                    iocextract.extract_emails(defanged_content, refang=True))
                self.assertNotEqual(
                    len(result), 1,
                    "should fail on defanging style : " + defang_style)
Пример #4
0
    def test_email_refang(self):
        content_list = [
            'myuser@example[.]com[.]tld',
            'myuser @example[.]com[.]tld',
            'myuser @ example.com.tld',
            'myuser@example(.)com[.tld',
            'myuser@example[.]com.tld',
            'myuser@example [.] com.tld',
            'myuser@example [.] com [.] tld',
            'myuser@example [.] com [.tld',
            'myuser@example   [[[  . ])] com [.tld',
            'myuser[@]example   [[[  . ])] com [.tld',
            'myuser [ @ ] example   [[[  . ])] com [.tld',
            'myuser { @ ) example   [[[  . ])] com [.tld',
            'myuser { @ ) example {  . ])] com [.tld',
            'myuser { at ) example {  . ])] com [.tld',
            'myuser { at ) example {  doT ])] com [dot tld',
            'myuser At example DOT com DOT tld',
            'myuser[@]example[.com[.tld]',
        ]

        for content in content_list:
            self.assertEqual(
                list(iocextract.extract_emails(content, refang=True))[0],
                '*****@*****.**')
            self.assertEqual(iocextract.refang_email(content),
                             '*****@*****.**')
def _sniff_text(text):
    """ checks every regex for findings, and return a dictionary of all findings """
    results = {}
    if (args.ioc):
        print("")
        urls = list(iocextract.extract_urls(text))
        ips = list(iocextract.extract_ips(text))
        emails = list(iocextract.extract_emails(text))
        hashes = list(iocextract.extract_hashes(text))
        rules = list(iocextract.extract_yara_rules(text))
        if (urls):
            results.update({"urls": urls})
        if (ips):
            results.update({"ips": ips})
        if (emails):
            results.update({"emails": emails})
        if (hashes):
            results.update({"hashes": hashes})
        if (rules):
            results.update({"rules": rules})

    else:
        for key, value in regexList.items():
            findings = set(re.findall(value, text))
            if findings:
                results.update({key: findings})
    return results
Пример #6
0
    def _utility_ioc_extractor_function(self, event, *args, **kwargs):

        results = {}
        results["was_successful"] = False

        try:
            # Get the function parameters:
            incident_id = kwargs.get("incident_id")  # number
            text_string = kwargs.get("text_string")  # text

            log = logging.getLogger(__name__)  # Establish logging

            text_string = unicodedata.normalize(
                "NFKD",
                BeautifulSoup(text_string, "html.parser").get_text(
                    ' '))  # Strip HTML and normalize text

            # Parse IOCs by type from text_string - OrderedDict.fromkeys() preserves order and removes duplicates.
            results["ipv4s"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_ipv4s(text_string, refang=True))))
            results["ipv6s"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_ipv6s(text_string))))
            results["urls"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_urls(
                        text_string, refang=True))))  # URLs and domains
            results["domains"] = list(
                OrderedDict.fromkeys([
                    urlparse(url).netloc for url in results["urls"]
                ]))  # domains only
            results["email_addresses"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_emails(text_string, refang=True))))
            results["email_domains"] = list(
                OrderedDict.fromkeys([
                    email.split('@')[1] for email in results["email_addresses"]
                ]))  # domains only
            results["md5_hashes"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_md5_hashes(text_string))))
            results["sha256_hashes"] = list(
                OrderedDict.fromkeys(
                    list(iocextract.extract_sha256_hashes(text_string))))
            results["was_successful"] = True

            # Produce a FunctionResult with the results
            yield FunctionResult(results)
        except Exception:
            yield FunctionError()
Пример #7
0
def ioc_parse(line):
    """ Use library that can handle defanged formats for IOCs (Indicators of Compromise) """
    params = []
    formatted = line
    for url in iocextract.extract_urls(formatted, strip=True):
        refanged = iocextract.refang_url(url)
        param = get_ioc_param('url', url, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], url,
                                    formatted[param[1]:])

    for ip in iocextract.extract_ipv4s(formatted):
        refanged = iocextract.refang_ipv4(ip)
        param = get_ioc_param('ip_address', ip, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], ip,
                                    formatted[param[1]:])

    for ip in iocextract.extract_ipv6s(formatted):
        param = get_ioc_param('ip_address', ip, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], ip,
                                    formatted[param[1]:])

    for email in iocextract.extract_emails(formatted):
        refanged = iocextract.refang_email(email)
        param = get_ioc_param('email', email, formatted)
        param.append(refanged)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], email,
                                    formatted[param[1]:])

    for h in iocextract.extract_hashes(formatted):
        param = get_ioc_param('hash', h, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], h,
                                    formatted[param[1]:])

    for rule in iocextract.extract_yara_rules(formatted):
        param = get_ioc_param('yara_rule', rule, formatted)
        params.append(param)
        formatted = '{}{}{}'.format(formatted[:param[0]], rule,
                                    formatted[param[1]:])

    return formatted, params
Пример #8
0
def create_group_pulse(input_text):
    # Create the pulse title
    unix_time = str(int(time.time()))
    pulse_title = 'SlackIOCs - ' + unix_time

    API_KEY = ''
    otx = OTXv2(API_KEY)

    group_id = 840

    # Create a list of indicators
    indicators = []

    for url in iocextract.extract_urls(input_text):
        indicators.append({'indicator': url, 'type': 'URL'})

    for ip in iocextract.extract_ips(input_text):
        indicators.append({'indicator': ip, 'type': 'IPv4'})

    for sha256 in iocextract.extract_sha256_hashes(input_text):
        indicators.append({'indicator': sha256, 'type': 'FileHash-SHA256'})

    for sha1 in iocextract.extract_sha1_hashes(input_text):
        indicators.append({'indicator': sha1, 'type': 'FileHash-SHA1'})

    for md5 in iocextract.extract_md5_hashes(input_text):
        indicators.append({'indicator': md5, 'type': 'FileHash-MD5'})

    for email in iocextract.extract_emails(input_text):
        indicators.append({'indicator': email, 'type': 'EMAIL'})

    print('Adding ' + str(indicators))

    response = otx.create_pulse(name=pulse_title,
                                public=True,
                                indicators=indicators,
                                tags=['covid19'],
                                references=[],
                                group_ids=[group_id],
                                tlp='White')

    print('Response: ' + str(response))
Пример #9
0
    def artifacts(self, raw):
        artifacts = []
        urls = list(iocextract.extract_urls(str(raw)))
        ipv4s = list(iocextract.extract_ipv4s(str(raw)))
        mail_addresses = list(iocextract.extract_emails(str(raw)))
        hashes = list(iocextract.extract_hashes(str(raw)))

        if urls:
            for u in urls:
                artifacts.append(self.build_artifact('url',str(u)))
        if ipv4s:
            for i in ipv4s:
                artifacts.append(self.build_artifact('ip',str(i)))
        if mail_addresses:
            for e in mail_addresses:
                artifacts.append(self.build_artifact('mail',str(e)))
        if hashes:
            for h in hashes:
                artifacts.append(self.build_artifact('hash',str(h)))
        return artifacts
Пример #10
0
    def test_defang_at(self):
        content = "*****@*****.**"
        combinations = [
            [
                "(@)",
                "(@",
                "@)",
            ],
            ["[@]", "[@", "@]"],
            ["{@}", "{@", "@}"],
        ]

        for substitution_type in combinations:
            for defang_style in substitution_type:
                defanged_content = content.replace("@", defang_style)
                #print("checking: " + defanged_content)
                result = list(
                    iocextract.extract_emails(defanged_content, refang=True))
                self.assertEqual(len(result), 1,
                                 "failed defang on: " + defang_style)
                self.assertEqual(result[0], content)
Пример #11
0
    def each(self, target):
        self.results = dict()

        # combine strings into one space-separated string
        target_strings = ' '.join(list(_strings(target)))

        # extract and add iocs
        iocs = []
        iocs.extend(list(iocextract.extract_ips(target_strings)))
        iocs.extend(list(iocextract.extract_emails(target_strings)))
        iocs.extend(list(iocextract.extract_hashes(target_strings)))
        iocs.extend(list(iocextract.extract_yara_rules(target_strings)))
        # iocs.extend(list(iocextract.extract_urls(target_strings)))
        iocs[:] = (value for value in iocs if value not in blacklist)

        # extract and add iocs
        self.results['iocs'] = iocs

        # Add observables
        for ioc in self.results['iocs']:
            self.add_ioc(ioc)  # TODO: tag
        return True
Пример #12
0
    def artifacts(self, raw):
        if self.filename:
            return [
                self.build_artifact("file", self.filename),
            ]
        else:
            artifacts = []
            raw_str = str(raw)
            raw_str = raw_str.replace('\\"', '"')
            urls = set(iocextract.extract_urls(raw_str))
            ipv4s = set(iocextract.extract_ipv4s(raw_str))
            mail_addresses = set(iocextract.extract_emails(raw_str))

            if urls:
                for u in urls:
                    artifacts.append(self.build_artifact("url", str(u)))
            if ipv4s:
                for i in ipv4s:
                    artifacts.append(self.build_artifact("ip", str(i)))
            if mail_addresses:
                for e in mail_addresses:
                    artifacts.append(self.build_artifact("mail", str(e)))
            return artifacts
Пример #13
0
    def check_clippy(iocs):
        last_text = ''

        while True:
            iocs_found = False
            urls, ips, emails, hashes = None, None, None, None
            text = clipboard.wait_for_text()

            # If there's text and it has not already been parsed
            if text is not None and text != last_text:
                urls = iter_check(extract_urls(text, refang=True))
                if urls is not None:
                    iocs = iocs + [u for u in urls]
                    iocs_found = True

                ips = iter_check(extract_ips(text, refang=True))
                if ips is not None:
                    iocs = iocs + [i for i in ips]
                    iocs_found = True
                
                emails = iter_check(extract_emails(text, refang=True))
                if emails is not None:
                    iocs = iocs + [e for e in emails]
                    iocs_found = True

                hashes = iter_check(extract_hashes(text))
                if hashes is not None:
                    iocs = iocs + [h for h in hashes]
                    iocs_found = True

                if iocs_found:
                    GLib.idle_add(win.submit_iocs, list(set(iocs)))

            iocs = []
            last_text = text
            time.sleep(1)
Пример #14
0
 def test_ip_email(self):
     content = "*****@*****.**"
     result = list(iocextract.extract_emails(content))
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], content)
Пример #15
0
 def test_xmpp(self):
     content = "*****@*****.**"
     result = list(iocextract.extract_emails(content))
     self.assertEqual(len(result), 1)
Пример #16
0
 def test_email(self):
     content = "*****@*****.**"
     result = list(iocextract.extract_emails(content))
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], content)
Пример #17
0
    def test_email_extract(self):
        content_list = [
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '*****@*****.**',
            '[email protected]',
            'myuser @example[.]com',
            'myuser@ example[.]com',
            'myuser @ example[.]com',
            'myuser @ example [ . ] com',
            'myuser @ example.com',
            'myuser@example [.] com',
            'myuser@example[.]com[.]tld',
            'myuser@example(.)com[.tld',
            'myuser@example[.]com.tld',
            'myuser@example [.] com.tld',
            'myuser@example [.] com [.] tld',
            'myuser@example [.] com [.tld',
            'myuser@example  [  .  ]   com',
            'myuser@example  [  .  ]   com    [   .tld',
            'myuser@example  [[[[ [ [ [ . )]) com',
            'myuser@example  [[[[ [ [ [ dot )]) com',
            'myuser at example  [[[[ [ [ [ dot )]) com',
            'myuser at example [ dot ] com',
            'myuser at example[ dot ]com',
            'myuser at example[dot]com',
            'myuser at example [dot] com',
            'myuser [at] example dot com',
            'myuser at example dot com',
            'myuser AT example DOT com',
            'myuser[@]example.com',
            'myuser[@]example[.com',
            'myuser [ at ] example.com',
            'myuser [at] example.com',
            'myuser[at]example.com',
            'myuser[ at ]example.com',
            'myuser/AT/example/DOT/com',
        ]

        for content in content_list:
            self.assertEqual(
                list(iocextract.extract_emails(content))[0], content)
            self.assertEqual(
                list(iocextract.extract_emails(_wrap_spaces(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_emails(_wrap_tabs(content)))[0],
                content)
            self.assertEqual(
                list(iocextract.extract_emails(_wrap_newlines(content)))[0],
                content)

        invalid_list = [
            '@a.co',
            'myuser@',
            '@',
            # don't extract non-fqdn emails
            'a@a',
            'myuser @ word more words',
            'myuser @ word more words.period',
            'myuser @ words. Sentence',
            'myuser@example . com',
            'myuser@example .]com',
            'myuseratexampledotcom',
            'myuseratexample dot com',
            'myuser at exampledotcom',
        ]

        for content in invalid_list:
            self.assertEqual(len(list(iocextract.extract_emails(content))), 0)
            self.assertEqual(
                len(list(iocextract.extract_emails(_wrap_spaces(content)))), 0)
            self.assertEqual(
                len(list(iocextract.extract_emails(_wrap_tabs(content)))), 0)
            self.assertEqual(
                len(list(iocextract.extract_emails(_wrap_newlines(content)))),
                0)

        expected = 'myuser@example [.] com'
        partial_list = [
            'myuser@example [.] com. tld',
            'myuser@example [.] com . tld',
            'myuser@example [.] com!!!???',
        ]

        for content in partial_list:
            self.assertEqual(
                list(iocextract.extract_emails(content))[0], expected)
            self.assertEqual(
                list(iocextract.extract_emails(_wrap_spaces(content)))[0],
                expected)
            self.assertEqual(
                list(iocextract.extract_emails(_wrap_tabs(content)))[0],
                expected)
            self.assertEqual(
                list(iocextract.extract_emails(_wrap_newlines(content)))[0],
                expected)
Пример #18
0
        
        for filename in os.listdir(path):
            if(count > maximum - 1):
                break
            if(filename in skip_files):
                continue

            # Extract text from pdf
            filepath = os.path.join(path, filename)
            content = convert_pdf_txt(filepath)
            
            # Extract Indicators of Compromise from text, recording time
            extracted_files[filename] = {}
            extract_start_time = time.time()
            extracted_files[filename]["urls"] = list(iocextract.extract_urls(content, refang=True))
            extracted_files[filename]["email_addresses"] = list(iocextract.extract_emails(content, refang=True))
            extracted_files[filename]["ipv4s"] = list(iocextract.extract_ipv4s(content, refang=True))
            extracted_files[filename]["ipv6s"] = list(iocextract.extract_ipv6s(content))
            extracted_files[filename]["md5s"] = list(iocextract.extract_md5_hashes(content))
            extracted_files[filename]["sha1s"] = list(iocextract.extract_sha1_hashes(content))
            extracted_files[filename]["sha256s"] = list(iocextract.extract_sha256_hashes(content))
            extracted_files[filename]["sha512s"] = list(iocextract.extract_sha512_hashes(content))
            extracted_files[filename]["yara"] = list(iocextract.extract_yara_rules(content))
            extract_avg_numerator += time.time() - extract_start_time
            
            count += 1

        process_end_time = time.time()

        # add some meta info on process run time
        extracted_files["meta"] = {