def test_csv_export(webapp, monkeypatch): """Test CSV export""" monkeypatch.setattr('dnstwister.tools.resolve', lambda domain: ('999.999.999.999', False)) domain = Domain('a.com') hexdomain = domain.to_hex() response = webapp.get('/search/{}/csv'.format(hexdomain)) assert response.headers[ 'Content-Disposition'] == 'attachment; filename=dnstwister_report_a.com.csv' assert '\n'.join(sorted( response.text.strip().split('\n'))) == textwrap.dedent(""" Domain,Type,Tweak,IP,Error a.com,Addition,aa.com,999.999.999.999,False a.com,Addition,ab.com,999.999.999.999,False a.com,Addition,ac.com,999.999.999.999,False a.com,Addition,ad.com,999.999.999.999,False a.com,Addition,ae.com,999.999.999.999,False a.com,Addition,af.com,999.999.999.999,False a.com,Addition,ag.com,999.999.999.999,False a.com,Addition,ah.com,999.999.999.999,False a.com,Addition,ai.com,999.999.999.999,False a.com,Addition,aj.com,999.999.999.999,False a.com,Addition,ak.com,999.999.999.999,False a.com,Addition,al.com,999.999.999.999,False a.com,Addition,am.com,999.999.999.999,False a.com,Addition,an.com,999.999.999.999,False a.com,Addition,ao.com,999.999.999.999,False a.com,Addition,ap.com,999.999.999.999,False a.com,Addition,aq.com,999.999.999.999,False a.com,Addition,ar.com,999.999.999.999,False a.com,Addition,as.com,999.999.999.999,False a.com,Addition,at.com,999.999.999.999,False a.com,Addition,au.com,999.999.999.999,False a.com,Addition,av.com,999.999.999.999,False a.com,Addition,aw.com,999.999.999.999,False a.com,Addition,ax.com,999.999.999.999,False a.com,Addition,ay.com,999.999.999.999,False a.com,Addition,az.com,999.999.999.999,False a.com,Bitsquatting,c.com,999.999.999.999,False a.com,Bitsquatting,e.com,999.999.999.999,False a.com,Bitsquatting,i.com,999.999.999.999,False a.com,Bitsquatting,q.com,999.999.999.999,False a.com,Original*,a.com,999.999.999.999,False a.com,Replacement,1.com,999.999.999.999,False a.com,Replacement,2.com,999.999.999.999,False a.com,Replacement,s.com,999.999.999.999,False a.com,Replacement,w.com,999.999.999.999,False a.com,Replacement,y.com,999.999.999.999,False a.com,Replacement,z.com,999.999.999.999,False a.com,Various,acom.com,999.999.999.999,False a.com,Various,wwa.com,999.999.999.999,False a.com,Various,www-a.com,999.999.999.999,False a.com,Various,wwwa.com,999.999.999.999,False a.com,Vowel swap,o.com,999.999.999.999,False a.com,Vowel swap,u.com,999.999.999.999,False """).strip()
def test_links_on_report(webapp): """Make sure the export links are working.""" domain = Domain('a.com') hexdomain = domain.to_hex() page_html = webapp.get('/search/{}'.format(hexdomain)).text assert '/search/{}/csv'.format(hexdomain) in page_html assert '/search/{}/json'.format(hexdomain) in page_html
def suggest_domain(search_domain): """Suggest a domain based on the search fields.""" search_terms = search_domain.split(' ') # Check for a simple common typo first - putting comma instead of period # in-between the second- and top-level domains. if len(search_terms) == 1: candidate = re.sub(r'[,/-]', '.', search_terms[0]) if Domain.try_parse(candidate) is not None: return candidate # Pick up space-separated domain levels. if len(search_terms) == 2 and search_terms[1] in tld_db.TLDS: candidate = '.'.join(search_terms) if Domain.try_parse(candidate) is not None: return candidate # Attempt to make a domain from the terms. joiners = ('', '-') # for now, also trialling ('', '-', '.') tlds = ('com', ) # for now suggestions = [] # Filter out a ton of garbage being submitted if len(search_terms) > 2: return # Filter out long words search_terms = [term for term in search_terms if len(term) < 30] # Filter out silly characters search_terms = [ re.sub(r'[^a-zA-Z0-9\-]', '', term) for term in search_terms ] # Join the terms for joiner in joiners: suggestions.append(joiner.join(search_terms)) # Add TLDs suggested_domains = [] for tld in tlds: suggested_domains.extend( ['{}.{}'.format(s.lower(), tld) for s in suggestions]) # Drop out duplicates suggested_domains = list(set(suggested_domains)) # Filter for those that are actually valid domains valid_suggestions = list( filter(lambda d: Domain.try_parse(d) is not None, suggested_domains)) if len(valid_suggestions) == 0: return return random.choice(valid_suggestions)
def test_dressed_check(): """Tests the detail of the "dressed" detection.""" assert parked_api.dressed(Domain('example.com'), Domain('www.example.com')) assert parked_api.dressed(Domain('example.com'), Domain('ww2.example.com')) assert parked_api.dressed(Domain('www.example.com'), Domain('example.com')) assert parked_api.dressed(Domain('www.example.com'), Domain('example.com.au')) assert not parked_api.dressed(Domain('www.example.com'), Domain('www.examples.com'))
def test_unicode_basics(webapp): """Test that Unicode domains work on all endpoints.""" unicode_domain = Domain('xn--sterreich-z7a.icom.museum') endpoints = ('fuzz', 'ip', 'parked', 'safebrowsing', 'whois') for endpoint in endpoints: webapp.get('/api/{}/{}'.format( endpoint, unicode_domain.to_hex(), )) webapp.get('/api/to_hex/{}'.format(unicode_domain.to_ascii()))
def test_invalid_domain_raises_exception(): with pytest.raises(InvalidDomainException): Domain(1) with pytest.raises(InvalidDomainException): Domain('') with pytest.raises(InvalidDomainException): Domain('?@?!#?!?23//d/sad') with pytest.raises(InvalidDomainException): Domain(None) with pytest.raises(InvalidDomainException): Domain(u'a\uDFFFa.com')
def test_analyse(self): """Test the tool that generates the reports.""" domain = Domain('a.com') results = tools.analyse(domain) self.assertEqual( 'a.com', results[0], 'First item in results should be the original domain') self.assertEqual(['fuzzy_domains'], list(results[1].keys()), 'We only return fuzzy domains in report') assert results[1]['fuzzy_domains'][0] == { 'domain-name': 'a.com', 'fuzzer': 'Original*', 'hex': '612e636f6d' } results = map(operator.itemgetter('domain-name'), results[1]['fuzzy_domains']) assert sorted(results) == [ '1.com', '2.com', 'a.com', 'aa.com', 'ab.com', 'ac.com', 'acom.com', 'ad.com', 'ae.com', 'af.com', 'ag.com', 'ah.com', 'ai.com', 'aj.com', 'ak.com', 'al.com', 'am.com', 'an.com', 'ao.com', 'ap.com', 'aq.com', 'ar.com', 'as.com', 'at.com', 'au.com', 'av.com', 'aw.com', 'ax.com', 'ay.com', 'az.com', 'c.com', 'e.com', 'i.com', 'o.com', 'q.com', 's.com', 'u.com', 'w.com', 'wwa.com', 'www-a.com', 'wwwa.com', 'y.com', 'z.com' ]
def test_parked(f_httpretty, webapp): """Test when the domains don't redirect.""" f_httpretty.register_uri(f_httpretty.GET, 'http://www.example.com:80/', status=302, adding_headers={ 'location': 'http://forsale.com', }) f_httpretty.register_uri( f_httpretty.GET, 'http://www.example.com:80/dnstwister_parked_check', status=302, adding_headers={ 'location': 'http://forsale.com', }) f_httpretty.register_uri(f_httpretty.GET, 'http://forsale.com', body='Buy this domain right now!') domain = 'www.example.com' hexdomain = Domain(domain).to_hex() response = webapp.get('/api/parked/{}'.format(hexdomain)).json assert response['score'] == 0.64 assert response['score_text'] == 'Quite likely' assert response['redirects'] assert response['redirects_to'] == 'forsale.com'
def test_unicode_resolve(webapp): """Check we can resolve a unicode domain. """ domain = 'xn--sterreich-z7a.icom.museum' hexdomain = Domain(domain).to_hex() response = webapp.get('/api/ip/{}'.format(hexdomain)) assert response.status_code == 200 payload = response.json ip_addr = payload['ip'] del payload['ip'] assert payload == { u'domain': u'xn--sterreich-z7a.icom.museum', u'domain_as_hexadecimal': u'786e2d2d7374657272656963682d7a37612e69636f6d2e6d757365756d', u'error': False, u'fuzz_url': u'http://localhost/api/fuzz/786e2d2d7374657272656963682d7a37612e69636f6d2e6d757365756d', u'parked_score_url': u'http://localhost/api/parked/786e2d2d7374657272656963682d7a37612e69636f6d2e6d757365756d', u'url': u'http://localhost/api/ip/786e2d2d7374657272656963682d7a37612e69636f6d2e6d757365756d' } # Will throw socket.error exception if this is not a valid IP address. socket.inet_aton(ip_addr)
def test_dressed_redirect(f_httpretty, webapp): """Test that going from naked to non-naked domain is not considered being parked. AKA, "dressing" the domain :) """ f_httpretty.register_uri(f_httpretty.GET, 'http://example.com:80/', status=302, adding_headers={ 'location': 'http://ww2.example.com', }) f_httpretty.register_uri( f_httpretty.GET, 'http://example.com/dnstwister_parked_check', body=lambda request, uri, headers: (404, {}, 'Boom'), ) f_httpretty.register_uri( f_httpretty.GET, 'http://ww2.example.com', body=lambda request, uri, headers: (200, {}, 'OK'), ) domain = 'example.com' hexdomain = Domain(domain).to_hex() response = webapp.get('/api/parked/{}'.format(hexdomain)).json assert response['score'] == 0.0
def test_json_export_no_fuzzy(webapp, monkeypatch): """Test JSON export when no fuzzy domains.""" monkeypatch.setattr('dnstwister.tools.dnstwist.DomainFuzzer', patches.NoFuzzer) monkeypatch.setattr('dnstwister.tools.resolve', lambda domain: ('999.999.999.999', False)) domains = ('a.com', ) path = ','.join([Domain(d).to_hex() for d in domains]) response = webapp.get('/search/{}/json'.format(path)) assert response.headers[ 'Content-Disposition'] == 'attachment; filename=dnstwister_report_a.com.json' assert response.json == { u'a.com': { u'fuzzy_domains': [{ u'domain-name': u'a.com', u'fuzzer': u'Original*', u'hex': u'612e636f6d', u'resolution': { u'error': False, u'ip': u'999.999.999.999' } }] } }
def try_parse_domain_from_hex(hex_encoded_ascii_domain): try: ascii_domain_text = bytes.fromhex(hex_encoded_ascii_domain).decode( 'ascii') except (ValueError, TypeError): return return Domain.try_parse(ascii_domain_text)
def test_failed_export(webapp): """Test unknown-format export""" domain = 'a.com' hexdomain = Domain(domain).to_hex() response = webapp.get('/search/{}/xlsx'.format(hexdomain), expect_errors=True) assert response.status_code == 400
def test_safebrowsing_with_bad_domain(webapp): """Test against the google test domain (malware.testing.google.test).""" domain = 'malware.testing.google.test' hexdomain = Domain(domain).to_hex() response = webapp.get('/api/safebrowsing/{}'.format(hexdomain)) assert response.status_code == 200 assert response.json['issue_detected'] is True
def test_whitespace_trimmed(webapp): """Tabs and spaces are cleaned up first.""" domain = " icloudstats.net\t\r \n" response = webapp.post('/search', {'domains': domain}).follow() assert response.status_code == 200 assert response.request.url == 'http://localhost/search/{}'.format( Domain('icloudstats.net').to_hex())
def test_unicode_fuzzing(): """Test can fuzz and generate unicode.""" unicode_domain = Domain('xn--domain.com').to_unicode() fuzzer = dnstwister.dnstwist.DomainFuzzer(unicode_domain) fuzzer.fuzz() assert sorted([d['domain-name'] for d in fuzzer.domains]) == [ u'www-\u3bd9\u3bdc\u3bd9\u3bdf.com', u'www\u3bd9\u3bdc\u3bd9\u3bdf.com', u'ww\u3bd9\u3bdc\u3bd9\u3bdf.com', u'\u3bd9-\u3bdc\u3bd9\u3bdf.com', u'\u3bd9.\u3bdc\u3bd9\u3bdf.com', u'\u3bd9\u3bd9\u3bdc\u3bd9\u3bdf.com', u'\u3bd9\u3bd9\u3bdc\u3bdf.com', u'\u3bd9\u3bd9\u3bdf.com', u'\u3bd9\u3bdc-\u3bd9\u3bdf.com', u'\u3bd9\u3bdc.\u3bd9\u3bdf.com', u'\u3bd9\u3bdc\u3bd9-\u3bdf.com', u'\u3bd9\u3bdc\u3bd9.com', u'\u3bd9\u3bdc\u3bd9.\u3bdf.com', u'\u3bd9\u3bdc\u3bd9\u3bd9\u3bdf.com', u'\u3bd9\u3bdc\u3bd9\u3bdf.com', u'\u3bd9\u3bdc\u3bd9\u3bdfa.com', u'\u3bd9\u3bdc\u3bd9\u3bdfb.com', u'\u3bd9\u3bdc\u3bd9\u3bdfc.com', u'\u3bd9\u3bdc\u3bd9\u3bdfcom.com', u'\u3bd9\u3bdc\u3bd9\u3bdfd.com', u'\u3bd9\u3bdc\u3bd9\u3bdfe.com', u'\u3bd9\u3bdc\u3bd9\u3bdff.com', u'\u3bd9\u3bdc\u3bd9\u3bdfg.com', u'\u3bd9\u3bdc\u3bd9\u3bdfh.com', u'\u3bd9\u3bdc\u3bd9\u3bdfi.com', u'\u3bd9\u3bdc\u3bd9\u3bdfj.com', u'\u3bd9\u3bdc\u3bd9\u3bdfk.com', u'\u3bd9\u3bdc\u3bd9\u3bdfl.com', u'\u3bd9\u3bdc\u3bd9\u3bdfm.com', u'\u3bd9\u3bdc\u3bd9\u3bdfn.com', u'\u3bd9\u3bdc\u3bd9\u3bdfo.com', u'\u3bd9\u3bdc\u3bd9\u3bdfp.com', u'\u3bd9\u3bdc\u3bd9\u3bdfq.com', u'\u3bd9\u3bdc\u3bd9\u3bdfr.com', u'\u3bd9\u3bdc\u3bd9\u3bdfs.com', u'\u3bd9\u3bdc\u3bd9\u3bdft.com', u'\u3bd9\u3bdc\u3bd9\u3bdfu.com', u'\u3bd9\u3bdc\u3bd9\u3bdfv.com', u'\u3bd9\u3bdc\u3bd9\u3bdfw.com', u'\u3bd9\u3bdc\u3bd9\u3bdfx.com', u'\u3bd9\u3bdc\u3bd9\u3bdfy.com', u'\u3bd9\u3bdc\u3bd9\u3bdfz.com', u'\u3bd9\u3bdc\u3bd9\u3bdf\u3bdf.com', u'\u3bd9\u3bdc\u3bdc\u3bd9\u3bdf.com', u'\u3bd9\u3bdc\u3bdf.com', u'\u3bd9\u3bdc\u3bdf\u3bd9.com', u'\u3bdc\u3bd9\u3bd9\u3bdf.com', u'\u3bdc\u3bd9\u3bdf.com', ]
def test_uses_idna_2008_encoding(): """We need to be using the IDNA 2008 encoding to be up to date and that means we need the idna module, not just pure Python. Python squashes that down to 'strasse.de' because if uses pre-2008 IDNA encoding. """ assert 'straße.de'.encode('idna').decode() == 'strasse.de' assert Domain('straße.de').to_ascii() == 'xn--strae-oqa.de'
def test_to_hex(webapp): """Test the hex helper.""" domain = 'www.example.com' hexdomain = Domain(domain).to_hex() response = webapp.get('/api/to_hex/{}'.format(domain)) assert response.json['domain_as_hexadecimal'] == hexdomain
def test_failed_resolve(webapp): """Test basic failure to resolve an IP for a domain - because it's unregistered. """ domain = 'imprettysurethatthisdomaindoesnotexist.com' response = webapp.get('/api/ip/{}'.format(Domain(domain).to_hex())) assert response.status_code == 200 assert response.json['ip'] is False assert response.json['error'] is False
def domain_to_hex(domain_param): """Helps you convert domains to hex.""" domain = Domain.try_parse(domain_param) if domain is None: flask.abort(400, 'Malformed domain.') hexdomain = domain.to_hex() payload = standard_api_values(domain, skip='domain_to_hex') payload['domain_as_hexadecimal'] = hexdomain return flask.jsonify(payload)
def test_can_compare_domain_instance_to_valid_strings(): assert Domain('ӓ.com') == 'xn--w5a.com' assert Domain('ӓ.com') == u'xn--w5a.com' assert Domain('ӓ.com') == b'xn--w5a.com' assert Domain('ӓ.com') == 'ӓ.com' assert Domain('ӓ.com') != 'blob' assert Domain('ӓ.com') != 2 assert Domain('ӓ.com') != None
def test_parked_query_on_broken_domain(webapp): """Test the parked API against a domain that doesn't exist.""" domain = 'there-is-little-chance-this-domain-exists-i-hope.com' hexdomain = Domain(domain).to_hex() request = webapp.get('/api/parked/{}'.format(hexdomain)) assert request.status_code == 200 assert request.json['score'] == 0 assert request.json['redirects'] is False assert request.json['redirects_to'] is None assert request.json['score_text'] == 'Unlikely' assert request.json['dressed'] is False
def test_all_twisted_domains_are_valid_idna2008(): """Some of the homoglyphs were not actually valid idna.""" for char in string.ascii_lowercase: domain = f'{char}{char}.com' fuzzer = dnstwister.dnstwist.DomainFuzzer(domain) fuzzer.fuzz() for result in fuzzer.domains: candidate = result['domain-name'] try: Domain(candidate) except dnstwister.core.domain.InvalidDomainException: raise Exception(f'The domain {candidate} / {candidate.encode()} is not IDNA2008 compatible')
def analyse(domain): """Analyse a domain.""" data = {'fuzzy_domains': []} results = fuzzy_domains(domain) # Add a hex-encoded version of the domain for the later IP resolution. We # do this because the same people who may use this app already have # blocking on things like www.exampl0e.com in URLs... for result in results: result['hex'] = Domain(result['domain-name']).to_hex() data['fuzzy_domains'] = results return (domain, data)
def test_suggestion(webapp): """Test that submitting no valid domains fails. Where a domain could be reasonably suggested, it is. """ response = webapp.post('/search', {'domains': 'example'}, expect_errors=True) assert response.status_code == 302 domain = 'example.com' enc_domain = Domain(domain).to_hex() expected_redirect = 'http://localhost/error/0?suggestion=' + enc_domain assert response.headers['location'] == expected_redirect
def test_parse_domain(self): """Tests of the helper that decodes and validates a domain. Function returns a valid domain or None. """ self.assertIs(None, tools.try_parse_domain_from_hex(''), 'Missing hex data should return None') self.assertIs(None, tools.try_parse_domain_from_hex(None), 'Non-hex-decodable data should return None') self.assertIs(None, tools.try_parse_domain_from_hex('he378a -- ?'), 'Non-hex-decodable data should return None') bad_domain = '\\www.z.comasfff' self.assertFalse( Domain.try_parse(bad_domain) is not None, 'Bad domain should be invalid') long_bad_domain = 'www.zsssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssszssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss.com' self.assertFalse( Domain.try_parse(long_bad_domain) is not None, 'Long domain should be invalid') bad_domain_data = binascii.hexlify(bad_domain.encode()) self.assertIs( None, tools.try_parse_domain_from_hex(bad_domain_data), 'hex-decodable (but invalid) domain data should return None') domain = 'www.example.com' self.assertTrue( Domain.try_parse(domain) is not None, 'Good domain should be valid') domain_data = binascii.hexlify(domain.encode()).decode() self.assertEqual('www.example.com', tools.try_parse_domain_from_hex(domain_data), 'hex-decodable valid domain data should be returned')
def test_json_export(webapp, monkeypatch): """Test JSON export""" monkeypatch.setattr('dnstwister.tools.dnstwist.DomainFuzzer', patches.SimpleFuzzer) monkeypatch.setattr('dnstwister.tools.resolve', lambda domain: ('999.999.999.999', False)) domain = Domain('a.com') path = domain.to_hex() response = webapp.get('/search/{}/json'.format(path)) assert response.headers[ 'Content-Disposition'] == 'attachment; filename=dnstwister_report_a.com.json' assert response.json == { u'a.com': { u'fuzzy_domains': [{ u'domain-name': u'a.com', u'fuzzer': u'Original*', u'hex': u'612e636f6d', u'resolution': { u'error': False, u'ip': u'999.999.999.999' } }, { u'domain-name': u'a.co', u'fuzzer': u'Pretend', u'hex': u'612e636f', u'resolution': { u'error': False, u'ip': u'999.999.999.999' } }] } }
def __filter_domains(self): seen = set() filtered = [] for d in self.domains: d_obj = Domain.try_parse(d['domain-name']) if d_obj is None: continue if d_obj.to_unicode() in seen: continue seen.add(d_obj.to_unicode()) filtered.append(d) self.domains = filtered
def test_post_unicode(webapp): """Test of end-to-end unicode.""" unicode_domain = 'höt.com' expected_punycode = 'xn--ht-fka.com' expected_hex = Domain(expected_punycode).to_hex() assert expected_hex == '786e2d2d68742d666b612e636f6d' response = webapp.post('/search', {'domains': unicode_domain}).follow() assert response.status_code == 200 assert response.request.url == 'http://localhost/search/{}'.format( expected_hex) assert unicode_domain in response.text assert 'höt.com (xn--ht-fka.com)' in response.text
def test_large_domain_is_reasonable_in_performance(): """Looooong domain names highlighted that the idna decoding is slooooow. This is a basic benchmark for performance, based on a bot's behaviour recently. """ domain = 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zzzzzzzzzzzzzzzzzzzzzzzzzppieo.com' start = datetime.datetime.now() dnstwister.tools.fuzzy_domains(Domain(domain)) duration = (datetime.datetime.now() - start).total_seconds() assert duration < 10, 'duration too long: {} secs'.format(duration) print('Long domain name fuzzed in: {} seconds'.format(duration))