def categorized (domain): """ Provides a discretized categorization on an input domain, and returns a DCat object dcat, where dcat.cat== 'benign', 'gray', 'suspicious', 'notsure', ... Note the suspicion here is determined on a per-domain basis, without considering the clustering effects among a bunch of names. """ global Cached try: return Cached[domain] except KeyError: ds = publicsuffix.DomainStruct(domain) # 'benign' determination if (not ds.isFQDN or whitelist.is_whitelisted_t2ld(ds.eTkLD[1]) or dnsblav.is_dnsblav_service(ds) or alexa.in_top(ds.eTkLD[1], 50000) #or good WOT ): cat = 'benign' # 'suspicious', to be done elif ds in blacklisted_domains: cat = 'suspicious' # 'gray', e.g., non-top-Alexa p2p/p**n sites elif ( p2p.is_p2p_domain(domain) or p**n.is_porn_domain(domain) ): cat = 'gray' # 'notsure' else: cat = 'notsure' dcat = DCat(cat, None) # None: to be implemented Cached[domain] = dcat return dcat
def test_is_p2p_domain (self): self.assertFalse(p2p.is_p2p_domain('mail.google.com')) self.assertTrue(p2p.is_p2p_domain('bt.332.org'))