Python extract_ips示例，iocextract.extract_ips Python示例

示例#1

0

显示文件

文件： tests.py 项目： mokarimi/python-iocextract

 def test_ip_regex_allows_backslash_escape(self):
     self.assertEqual(
         list(iocextract.extract_ips('10.10.10\.10', refang=True))[0],
         '10.10.10.10')
     self.assertEqual(
         list(iocextract.extract_ips('10.10.10\\\\\\\\.10',
                                     refang=True))[0], '10.10.10.10')
     self.assertEqual(
         list(iocextract.extract_ips('10\.10\.10\.10', refang=True))[0],
         '10.10.10.10')
     self.assertEqual(
         list(iocextract.extract_ips('10\\\\\\\\\.10\\.10\.10',
                                     refang=True))[0], '10.10.10.10')
     self.assertEqual(
         list(iocextract.extract_ips('10[.]10(.10\.10', refang=True))[0],
         '10.10.10.10')

示例#2

0

显示文件

文件： hamburglar.py 项目： polling-repo-continua/Hamburglar

def _sniff_text(text):
    """ checks every regex for findings, and return a dictionary of all findings """
    results = {}
    if (args.ioc):
        print("")
        urls = list(iocextract.extract_urls(text))
        ips = list(iocextract.extract_ips(text))
        emails = list(iocextract.extract_emails(text))
        hashes = list(iocextract.extract_hashes(text))
        rules = list(iocextract.extract_yara_rules(text))
        if (urls):
            results.update({"urls": urls})
        if (ips):
            results.update({"ips": ips})
        if (emails):
            results.update({"emails": emails})
        if (hashes):
            results.update({"hashes": hashes})
        if (rules):
            results.update({"rules": rules})

    else:
        for key, value in regexList.items():
            findings = set(re.findall(value, text))
            if findings:
                results.update({key: findings})
    return results

示例#3

0

显示文件

文件： script-2.py 项目： tantongz/MHA-Assessment

def extract(filein, fileout):

 	# Setting up extractation of text from pdf
	rsrcmgr = PDFResourceManager()
	retstr = StringIO()
	codec = 'utf-8'  # 'utf16','utf-8'
	laparams = LAParams()
	device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)

	# open file
	f = open(filein, mode='rb')
	interpreter = PDFPageInterpreter(rsrcmgr, device)
	for page in PDFPage.get_pages(f):
		interpreter.process_page(page)
	f.close()
	device.close()
	text = retstr.getvalue()
	retstr.close()

	with open(fileout+".csv","w",newline="") as file:
		writer = csv.writer(file)
		writer.writerow(["IP","ASN","Country Code"])
		for ip in iocextract.extract_ips(text, refang=True):
			print(ip)
			try:
				ans = resolveIP(ip)
			except:
				print("An error has occured")
			writer.writerow(ans)
	file.close()
	return

示例#4

0

显示文件

def extract_text_obserables(username, text):
    observable_list = []

    user_id = '@{0}'.format(username)
    user_url = 'https://twitter.com/{0}'.format(username)

    try:
        for ip in iocextract.extract_ips(text, refang=True):
            if validate_ip(ip):
                observable_list.append(TwitterObservable(user_id, user_url, 'ip', ip))

        for url in iocextract.extract_urls(text, refang=True):
            if 'ghostbin.com' in url or 'pastebin.com' in url:
                paste_observables = extract_paste_observables(username, url)

                if len(paste_observables) > 0:
                    observable_list.extend(paste_observables)

            elif validate_url(url):
                observable_list.append(TwitterObservable(user_id, user_url, 'url', clean_url(url)))

    except Exception as e:
        logger.warning('Exception parsing text: {0}'.format(e))

    return observable_list

示例#5

0

显示文件

文件： tests.py 项目： mokarimi/python-iocextract

 def test_ip_regex_allows_multiple_brackets(self):
     self.assertEqual(
         list(iocextract.extract_ips('10.10.10.]]]10', refang=True))[0],
         '10.10.10.10')
     self.assertEqual(
         list(iocextract.extract_ips('10.10.10.)))10', refang=True))[0],
         '10.10.10.10')
     self.assertEqual(
         list(iocextract.extract_ips('10.10.10[[[.10', refang=True))[0],
         '10.10.10.10')
     self.assertEqual(
         list(
             iocextract.extract_ips('10[[[[.]]]]10[[[.]]10[.10',
                                    refang=True))[0], '10.10.10.10')
     self.assertEqual(
         list(iocextract.extract_ips('10(((.]]]]10([[.)10.)10',
                                     refang=True))[0], '10.10.10.10')

示例#6

0

显示文件

文件： tango_submission.py 项目： barkyvonschnauzer/tango-submission

def extract_URLs(content):

    if content is not None:
        print ("\n***** Extract URLs *****\n")
        ### Identify URLs in content ###

        extractor = URLExtract();
        extractor_urls  = extractor.find_urls(content)
        
        iocextract_urls = list(iocextract.extract_urls(content, refang=True))
        iocextract_ips  = list(iocextract.extract_ips(content, refang=True))

        iocextract_ips_valid = []

        if (len(iocextract_ips) > 0):
            for ip in iocextract_ips:
                # Add check to further refine list of potential IPs:
                # Basic format check: 
                #     IPv4: xxx.xxx.xxx.xxx or
                #     IPv6: xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
                if ip.count(".") != 3 or ip.count(":") != 7:
                    print ("Invalid IP address: " + str(ip))
                else:
                    iocextract_ips_valid.append(ip)
        
        print ("iocextract.extract_ips method - format validated")
        print (iocextract_ips_valid)
        print ("extractor.find method")
        print (extractor_urls)
        print ("iocextract.extract_urls method")
        print (iocextract_urls)

        info_to_evaluate = extractor_urls + iocextract_urls + iocextract_ips_valid

        index = 0

        # Occassionally, the functions above return urls with trailing commas.  Remove these.
        for ioc in info_to_evaluate:
            if ioc.endswith(','):
                info_to_evaluate[index] = ioc[:-1]
            index += 1

        print ("Removed trailing commas")
        print (info_to_evaluate)

        print ("Successfully extracted URLs")

        return info_to_evaluate

示例#7

0

显示文件

def main():

    # Parse input file
    stix_package = STIXPackage.from_xml(FILENAME)

    # Convert STIXPackage to a Python
    stix_dict = stix_package.to_dict()

    #Extract description from the indicator (suitable for indicator only)
    description = stix_dict["indicators"][0]["description"]
    # Convert the first STIXPackage dictionary into another STIXPackage via
    # the from_dict() method.

    # Pattern for domain / email and IP addresses
    raw_iocs = re.findall(
        r'[a-zA-Z0-9-\.]*\[\.?\@?\][a-zA-Z0-9-\.\[\.\@\]]*[-a-zA-Z0-9@:%_\+.~#?&//=]*',
        description)

    print(len(raw_iocs))

    for i in range(len(raw_iocs)):
        # Replace the on9 strings
        for on9string in on9strings:
            raw_iocs[i] = raw_iocs[i].replace(on9string, on9strings[on9string])
        # Import those IOCs into the array.
        if re.match(r'.*[@]+', raw_iocs[i]):
            iocs['email'].append(raw_iocs[i])
        elif re.match(r'.*[//].*', raw_iocs[i]):
            iocs['url'].append(raw_iocs[i])
        elif re.match(r'.*[a-zA-Z]', raw_iocs[i]):
            iocs['domain'].append(raw_iocs[i])

    #Extract hashes by their plugin
    for hash_extracted in iocextract.extract_hashes(description):
        iocs['hash'].append(hash_extracted)
    #Extract Yara rule
    for yara_extracted in iocextract.extract_yara_rules(description):
        iocs['yara'].append(yara_extracted)
    #Extract IP
    for ip_extracted in iocextract.extract_ips(description, refang=True):
        iocs['ip'].append(ip_extracted)

    for key in iocs:
        for item in iocs[key]:
            print(key + ":" + item)

示例#8

0

显示文件

文件： run.py 项目： chrisdoman/covidstuff

def create_group_pulse(input_text):
    # Create the pulse title
    unix_time = str(int(time.time()))
    pulse_title = 'SlackIOCs - ' + unix_time

    API_KEY = ''
    otx = OTXv2(API_KEY)

    group_id = 840

    # Create a list of indicators
    indicators = []

    for url in iocextract.extract_urls(input_text):
        indicators.append({'indicator': url, 'type': 'URL'})

    for ip in iocextract.extract_ips(input_text):
        indicators.append({'indicator': ip, 'type': 'IPv4'})

    for sha256 in iocextract.extract_sha256_hashes(input_text):
        indicators.append({'indicator': sha256, 'type': 'FileHash-SHA256'})

    for sha1 in iocextract.extract_sha1_hashes(input_text):
        indicators.append({'indicator': sha1, 'type': 'FileHash-SHA1'})

    for md5 in iocextract.extract_md5_hashes(input_text):
        indicators.append({'indicator': md5, 'type': 'FileHash-MD5'})

    for email in iocextract.extract_emails(input_text):
        indicators.append({'indicator': email, 'type': 'EMAIL'})

    print('Adding ' + str(indicators))

    response = otx.create_pulse(name=pulse_title,
                                public=True,
                                indicators=indicators,
                                tags=['covid19'],
                                references=[],
                                group_ids=[group_id],
                                tlp='White')

    print('Response: ' + str(response))

示例#9

0

显示文件

文件： script-1.py 项目： tantongz/MHA-Assessment

def extract(filein, fileout):

    # Setting up extractation of text from pdf
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    codec = 'utf-8'  # 'utf16','utf-8'
    laparams = LAParams()
    device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)

    # open file
    f = open(filein, mode='rb')
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    for page in PDFPage.get_pages(f):
        interpreter.process_page(page)
    f.close()
    device.close()
    text = retstr.getvalue()
    retstr.close()

    # open/create output file
    fout = open(fileout + ".txt", mode="wb")

    fout.write(b"=== IP ===\n")
    for ip in iocextract.extract_ips(text, refang=True):
        # print(ip)
        fout.write(ip.encode("latin-1") + b"\n")

    fout.write(b"=== URL ===\n")
    for url in iocextract.extract_urls(text, refang=True):
        # print(url)
        fout.write(url.encode("latin-1") + b"\n")

    fout.write(b"=== Hashes ===\n")
    for _hash in iocextract.extract_hashes(text):
        # print(_hash)
        fout.write(_hash.encode("latin-1") + b"\n")

    fout.close()
    return

示例#10

0

显示文件

文件： ioc_extract.py 项目： ehrenb/fame_modules

    def each(self, target):
        self.results = dict()

        # combine strings into one space-separated string
        target_strings = ' '.join(list(_strings(target)))

        # extract and add iocs
        iocs = []
        iocs.extend(list(iocextract.extract_ips(target_strings)))
        iocs.extend(list(iocextract.extract_emails(target_strings)))
        iocs.extend(list(iocextract.extract_hashes(target_strings)))
        iocs.extend(list(iocextract.extract_yara_rules(target_strings)))
        # iocs.extend(list(iocextract.extract_urls(target_strings)))
        iocs[:] = (value for value in iocs if value not in blacklist)

        # extract and add iocs
        self.results['iocs'] = iocs

        # Add observables
        for ioc in self.results['iocs']:
            self.add_ioc(ioc)  # TODO: tag
        return True

示例#11

0

显示文件

    def check_clippy(iocs):
        last_text = ''

        while True:
            iocs_found = False
            urls, ips, emails, hashes = None, None, None, None
            text = clipboard.wait_for_text()

            # If there's text and it has not already been parsed
            if text is not None and text != last_text:
                urls = iter_check(extract_urls(text, refang=True))
                if urls is not None:
                    iocs = iocs + [u for u in urls]
                    iocs_found = True

                ips = iter_check(extract_ips(text, refang=True))
                if ips is not None:
                    iocs = iocs + [i for i in ips]
                    iocs_found = True
                
                emails = iter_check(extract_emails(text, refang=True))
                if emails is not None:
                    iocs = iocs + [e for e in emails]
                    iocs_found = True

                hashes = iter_check(extract_hashes(text))
                if hashes is not None:
                    iocs = iocs + [h for h in hashes]
                    iocs_found = True

                if iocs_found:
                    GLib.idle_add(win.submit_iocs, list(set(iocs)))

            iocs = []
            last_text = text
            time.sleep(1)

示例#12

0

显示文件

文件： tests.py 项目： mokarimi/python-iocextract

 def test_ipv6_included_in_ips(self):
     content = '2001:0db8:85a3:0000:0000:8a2e:0370:7334'
     self.assertEqual(list(iocextract.extract_ips(content))[0], content)

示例#13

0

显示文件

文件： tests.py 项目： mokarimi/python-iocextract

 def test_ipv4_included_in_ips(self):
     content = '127.0.0.1'
     self.assertEqual(list(iocextract.extract_ips(content))[0], content)

示例#14

0

显示文件

def get_ips(content):
    array_ips = []
    for ips in iocextract.extract_ips(content):
        array_ips.append(ips)
    return array_ips

示例#15

0

显示文件

    def process_element(self,
                        content,
                        reference_link,
                        include_nonobfuscated=False):
        """Take a single source content/url and return a list of Artifacts"""

        # truncate content to a reasonable length for reference_text
        reference_text = content[:TRUNCATE_LENGTH] + (
            '...' if len(content) > TRUNCATE_LENGTH else '')

        artifact_list = []

        # collect URLs and domains
        scraped = iocextract.extract_urls(content)
        for url in scraped:
            # dump anything with ellipses, these get through the regex
            if u'\u2026' in url:
                continue

            artifact = threatingestor.artifacts.URL(
                url,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            # dump urls that appear to have the same domain as reference_url
            if artifact.domain() == urlparse(reference_link).netloc:
                continue

            if artifact.is_obfuscated() or include_nonobfuscated:
                # do URL collection
                artifact_list.append(artifact)

                # do domain collection in the same pass
                if artifact.is_domain():
                    artifact_list.append(
                        threatingestor.artifacts.Domain(
                            artifact.domain(),
                            self.name,
                            reference_link=reference_link,
                            reference_text=reference_text))

        # collect IPs
        scraped = iocextract.extract_ips(content)
        for ip in scraped:
            artifact = threatingestor.artifacts.IPAddress(
                ip,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            try:
                ipaddress = artifact.ipaddress()
                if ipaddress.is_private or ipaddress.is_loopback or ipaddress.is_reserved:
                    # don't care
                    continue

            except ValueError:
                # invalid IP
                continue

            artifact_list.append(artifact)

        # collect yara rules
        scraped = iocextract.extract_yara_rules(content)
        for rule in scraped:
            artifact = threatingestor.artifacts.YARASignature(
                rule,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            artifact_list.append(artifact)

        # collect hashes
        scraped = iocextract.extract_hashes(content)
        for hash_ in scraped:
            artifact = threatingestor.artifacts.Hash(
                hash_,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            artifact_list.append(artifact)

        # generate generic task
        title = "Manual Task: {u}".format(u=reference_link)
        description = 'URL: {u}\nTask autogenerated by ThreatIngestor from source: {s}'.format(
            s=self.name, u=reference_link)
        artifact = threatingestor.artifacts.Task(title,
                                                 self.name,
                                                 reference_link=reference_link,
                                                 reference_text=description)
        artifact_list.append(artifact)

        return artifact_list

示例#16

0

显示文件

文件： FSISAC_STIX_Parser.py 项目： fearlessahmet/FSISAC_STIX_Downloader

    def parse_indicators_from_description_string(self, description_string,
                                                 title):

        # print type(description_string)

        iocs = {
            'title': title,
            'domain': [],
            'ip': [],
            'email': [],
            'hash': [],
            'url': [],
            'hash': [],
            'yara': [],
            'other': []
        }
        on9strings = {'[.]': '.', 'hxxp': 'http', '[@]': '@'}

        # Convert the first STIXPackage dictionary into another STIXPackage via the from_dict() method.
        # Pattern for domain / email and IP addresses
        raw_iocs = re.findall(
            r'[a-zA-Z0-9-\.]*\[\.?\@?\][a-zA-Z0-9-\.\[\.\@\]]*[-a-zA-Z0-9@:%_\+.~#?&//=]*',
            description_string)

        # print(len(raw_iocs))

        # for i in range(len(raw_iocs)):
        #     # Replace the on9 strings
        #     for on9string in on9strings:
        #         raw_iocs[i] = raw_iocs[i].replace(on9string, on9strings[on9string])

        #     # Import those IOCs into the array.
        #     if re.match(r'.*[@]+', raw_iocs[i]):
        #         iocs['email'].append(raw_iocs[i])

        #     elif re.match(r'.*[//].*', raw_iocs[i]):
        #         iocs['url'].append(raw_iocs[i])

        #     elif re.match(r'.*[a-zA-Z]', raw_iocs[i]):
        #         iocs['domain'].append(raw_iocs[i])

        # # Extract hashes by their plugin
        # for hash_extracted in iocextract.extract_hashes(description_string):
        #     iocs['hash'].append(hash_extracted)

        # # Extract Yara rule
        # for yara_extracted in iocextract.extract_yara_rules(description_string):
        #     iocs['yara'].append(yara_extracted)

        # # Extract IP
        # for ip_extracted in iocextract.extract_ips(description_string, refang=True):
        #     iocs['ip'].append(ip_extracted)

        for i in range(len(raw_iocs)):
            # Replace the on9 strings
            for on9string in on9strings:
                raw_iocs[i] = raw_iocs[i].replace(on9string,
                                                  on9strings[on9string])

            # Import those IOCs into the array.
            if re.match(r'.*[@]+', raw_iocs[i]):
                iocs['email'].append(raw_iocs[i])
                iocs['email'] = list(set(iocs['email']))

            elif re.match(r'.*[//].*', raw_iocs[i]):
                iocs['url'].append(raw_iocs[i])
                iocs['url'] = list(set(iocs['url']))

            elif re.match(r'.*[a-zA-Z]', raw_iocs[i]):
                if re.match("^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}$",
                            raw_iocs[i]):
                    iocs['domain'].append(raw_iocs[i])
                    iocs['domain'] = list(set(iocs['domain']))

        # Extract hashes by their plugin
        for hash_extracted in iocextract.extract_hashes(description_string):
            iocs['hash'].append(hash_extracted)
            iocs['hash'] = list(set(iocs['hash']))

        # Extract Yara rule
        for yara_extracted in iocextract.extract_yara_rules(
                description_string):
            iocs['yara'].append(yara_extracted)
            iocs['yara'] = list(set(iocs['yara']))

        # Extract IP
        for ip_extracted in iocextract.extract_ips(description_string,
                                                   refang=True):
            # Use regex to validate the IP format
            if re.match(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
                        ip_extracted):
                iocs['ip'].append(ip_extracted)
                iocs['ip'] = list(set(iocs['ip']))

        # for key in iocs:
        #     for item in iocs[key]:
        #         print(key + ":" + item)

        return iocs

示例#17

0

显示文件

文件： FSISAC_STIX_Parser.py 项目： fearlessahmet/FSISAC_STIX_Downloader

    def _parse_indicators_from_stix_description(self, xml_content):
        iocs = {
            'title': '',
            'domain': [],
            'ip': [],
            'email': [],
            'hash': [],
            'url': [],
            'hash': [],
            'yara': [],
            'other': []
        }
        on9strings = {'[.]': '.', 'hxxp': 'http', '[@]': '@'}

        # Parse input file
        stix_package = STIXPackage.from_xml(xml_content)

        # Convert STIXPackage to a Python
        stix_dict = stix_package.to_dict()

        # Extract description from the indicator (suitable for indicator only)
        # print "-" * 100
        # print stix_dict
        # print "-" * 100

        description = stix_dict["indicators"][0]["description"]

        # Extract title
        title = stix_dict["indicators"][0]["title"]
        iocs['title'] = [title]

        # Convert the first STIXPackage dictionary into another STIXPackage via the from_dict() method.
        # Pattern for domain / email and IP addresses
        raw_iocs = re.findall(
            r'[a-zA-Z0-9-\.]*\[\.?\@?\][a-zA-Z0-9-\.\[\.\@\]]*[-a-zA-Z0-9@:%_\+.~#?&//=]*',
            description)

        # print(len(raw_iocs))

        for i in range(len(raw_iocs)):
            # Replace the on9 strings
            for on9string in on9strings:
                raw_iocs[i] = raw_iocs[i].replace(on9string,
                                                  on9strings[on9string])

            # Import those IOCs into the array.
            if re.match(r'.*[@]+', raw_iocs[i]):
                iocs['email'].append(raw_iocs[i])
                iocs['email'] = list(set(iocs['email']))

            elif re.match(r'.*[//].*', raw_iocs[i]):
                iocs['url'].append(raw_iocs[i])
                iocs['url'] = list(set(iocs['url']))

            elif re.match(r'.*[a-zA-Z]', raw_iocs[i]):
                if re.match("^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}$",
                            raw_iocs[i]):
                    iocs['domain'].append(raw_iocs[i])
                    iocs['domain'] = list(set(iocs['domain']))

        # Extract hashes by their plugin
        for hash_extracted in iocextract.extract_hashes(description):
            iocs['hash'].append(hash_extracted)
            iocs['hash'] = list(set(iocs['hash']))

        # Extract Yara rule
        for yara_extracted in iocextract.extract_yara_rules(description):
            iocs['yara'].append(yara_extracted)
            iocs['yara'] = list(set(iocs['yara']))

        # Extract IP
        for ip_extracted in iocextract.extract_ips(description, refang=True):
            # Use regex to validate the IP format
            if re.match(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b",
                        ip_extracted):
                iocs['ip'].append(ip_extracted)
                iocs['ip'] = list(set(iocs['ip']))

        # for key in iocs:
        #     for item in iocs[key]:
        #         print(key + ":" + item)

        return iocs

示例#18

0

显示文件

文件： __init__.py 项目： horialex/threatingestor

    def process_element(self,
                        content,
                        reference_link,
                        include_nonobfuscated=False):
        """Take a single source content/url and return a list of Artifacts.

        This is the main work block of Source plugins, which handles
        IOC extraction and artifact creation.

        :param content: String content to extract from.
        :param reference_link: Reference link to attach to all artifacts.
        :param include_nonobfuscated: Include non-defanged URLs in output?
        """
        logger.debug(f"Processing in source '{self.name}'")

        # Truncate content to a reasonable length for reference_text.
        reference_text = content[:TRUNCATE_LENGTH] + (
            '...' if len(content) > TRUNCATE_LENGTH else '')

        # Initialize an empty list and a map of counters to track each artifact type.
        artifact_list = []
        artifact_type_count = {
            'domain': 0,
            'hash': 0,
            'ipaddress': 0,
            'task': 0,
            'url': 0,
            'yarasignature': 0,
        }

        # Collect URLs and domains.
        scraped = itertools.chain(
            iocextract.extract_unencoded_urls(content),
            # Decode encoded URLs, since we can't operate on encoded ones.
            iocextract.extract_encoded_urls(content, refang=True),
        )
        for url in scraped:
            # Dump anything with ellipses, these get through the regex.
            if u'\u2026' in url:
                continue

            artifact = threatingestor.artifacts.URL(
                url,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            # Dump URLs that appear to have the same domain as reference_url.
            try:
                if artifact.domain() == urlparse(reference_link).netloc:
                    continue
            except ValueError:
                # Error parsing reference_link as a URL. Ignoring.
                pass

            if artifact.is_obfuscated() or include_nonobfuscated:
                # Do URL collection.
                artifact_list.append(artifact)
                artifact_type_count['url'] += 1

                # Do domain collection in the same pass.
                # Note: domains will always be a subset of URLs. There is no
                # domain extraction.
                if artifact.is_domain():
                    artifact_list.append(
                        threatingestor.artifacts.Domain(
                            artifact.domain(),
                            self.name,
                            reference_link=reference_link,
                            reference_text=reference_text))
                    artifact_type_count['domain'] += 1

        # Collect IPs.
        scraped = iocextract.extract_ips(content)
        for ip in scraped:
            artifact = threatingestor.artifacts.IPAddress(
                ip,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            try:
                ipaddress = artifact.ipaddress()
                if ipaddress.is_private or ipaddress.is_loopback or ipaddress.is_reserved:
                    # Skip private, loopback, reserved IPs.
                    continue

            except ValueError:
                # Skip invalid IPs.
                continue

            artifact_list.append(artifact)
            artifact_type_count['ipaddress'] += 1

        # Collect YARA rules.
        scraped = iocextract.extract_yara_rules(content)
        for rule in scraped:
            artifact = threatingestor.artifacts.YARASignature(
                rule,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            artifact_list.append(artifact)
            artifact_type_count['yarasignature'] += 1

        # Collect hashes.
        scraped = iocextract.extract_hashes(content)
        for hash_ in scraped:
            artifact = threatingestor.artifacts.Hash(
                hash_,
                self.name,
                reference_link=reference_link,
                reference_text=reference_text)

            artifact_list.append(artifact)
            artifact_type_count['hash'] += 1

        # Generate generic task.
        title = f"Manual Task: {reference_link}"
        description = f"URL: {reference_link}\nTask autogenerated by ThreatIngestor from source: {self.name}"
        artifact = threatingestor.artifacts.Task(title,
                                                 self.name,
                                                 reference_link=reference_link,
                                                 reference_text=description)
        artifact_list.append(artifact)
        artifact_type_count['task'] += 1

        logger.debug(f"Found {len(artifact_list)} total artifacts")
        logger.debug(f"Type breakdown: {artifact_type_count}")
        return artifact_list