def test_refang_url(self): content_list = [ 'http://example.com/test', 'http:// example .com /test', 'http://example[.]com/test', 'http://example[.]com[/]test', 'http://example(.)com(/)test', 'http://example[dot]com/test', 'hxxp://example.com/test', 'example [.] com/test', 'example(.)com/test', 'hxxp://example[.com/test', 'hxxp://example.]com/test', 'hxxp://exampledot]com/test', 'hxxp://example[dotcom/test', 'hxxp://example.com[/test', 'http__example.com/test', ] for content in content_list: self.assertEqual( list(iocextract.extract_urls(content, refang=True))[0], 'http://example.com/test') self.assertEqual(iocextract.refang_url(content), 'http://example.com/test') self.assertEqual(iocextract.refang_url('ftx://example.com/test'), 'ftp://example.com/test') # IPv6 works as expected content = 'http://[2001:db8:85a3:0:0:8a2e:370:7334]:80/test' self.assertEqual(iocextract.refang_url(content), content) self.assertEqual( list(iocextract.extract_urls(content, refang=True))[0], content)
def test_path_refang(self): content_list = [ 'http://example.com/test[.]htm', 'http://example[.]com/test[.]htm', ] for content in content_list: self.assertEqual(list(iocextract.extract_urls(content, refang=True))[0], 'http://example.com/test.htm') self.assertEqual(iocextract.refang_url(content), 'http://example.com/test.htm')
def is_ipv4(self): """Boolean: URL network location is an IPv4 address, not a domain?""" parsed = urlparse(iocextract.refang_url(self.artifact)) try: ipaddress.IPv4Address( parsed.netloc.split(':')[0].replace('[', '').replace( ']', '').replace(',', '.')) except ValueError: return False return True
def ioc_parse(line): """ Use library that can handle defanged formats for IOCs (Indicators of Compromise) """ params = [] formatted = line for url in iocextract.extract_urls(formatted, strip=True): refanged = iocextract.refang_url(url) param = get_ioc_param('url', url, formatted) param.append(refanged) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], url, formatted[param[1]:]) for ip in iocextract.extract_ipv4s(formatted): refanged = iocextract.refang_ipv4(ip) param = get_ioc_param('ip_address', ip, formatted) param.append(refanged) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], ip, formatted[param[1]:]) for ip in iocextract.extract_ipv6s(formatted): param = get_ioc_param('ip_address', ip, formatted) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], ip, formatted[param[1]:]) for email in iocextract.extract_emails(formatted): refanged = iocextract.refang_email(email) param = get_ioc_param('email', email, formatted) param.append(refanged) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], email, formatted[param[1]:]) for h in iocextract.extract_hashes(formatted): param = get_ioc_param('hash', h, formatted) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], h, formatted[param[1]:]) for rule in iocextract.extract_yara_rules(formatted): param = get_ioc_param('yara_rule', rule, formatted) params.append(param) formatted = '{}{}{}'.format(formatted[:param[0]], rule, formatted[param[1]:]) return formatted, params
def is_ipv6(self): """Boolean: URL network location is an IPv6 address, not a domain?""" # fix urlparse exception parsed = urlparse(iocextract.refang_url(self.artifact)) # Handle RFC 2732 IPv6 URLs with and without port, as well as non-RFC IPv6 URLs if ']:' in parsed.netloc: ipv6 = ':'.join(parsed.netloc.split(':')[:-1]) else: ipv6 = parsed.netloc try: ipaddress.IPv6Address(ipv6.replace('[', '').replace(']', '')) except ValueError: return False return True
def test_refang_removes_some_backslash_escaped_characters(self): self.assertEqual(iocextract.refang_url('https://example\(.)com/'), 'https://example.com/') self.assertEqual( iocextract.refang_url('https://example\(.\)com/test\.html'), 'https://example.com/test.html')
def test_refang_never_excepts_from_urlparse(self): try: iocextract.refang_url('hxxp__test]') iocextract.refang_url('CDATA[^h00ps://test.com/]]>') except ValueError as e: self.fail('Unhandled parsing error in refang: {e}'.format(e=e))
def _stringify(self): """Always returns deobfuscated URL.""" return iocextract.refang_url(self.artifact)
def __unicode__(self): """Always returns deobfuscated url""" return unicode(iocextract.refang_url(self.artifact))