def testFilter0(self): testcfg.set('filter.domain.0', '') testcfg.set('filter.domain.1', '') testcfg.set('filter.domain.2', '') testcfg.set('filter.domain.3', '') testcfg.set('filter.domain.4', '') domain_filter.g_exdm = None # force reload self.assertEqual(None, domain_filter.match('')) self.assertEqual(None, domain_filter.match('http://abc'))
def preparse_filter(first_block, meta): """ Filter by domain and magic header. Returns distill result """ uri = meta.get('uri','') dm = domain_filter.match(uri) if dm: return EXDOMAIN, dm guessed = magic.guess_type(first_block) if guessed and guessed != 'text/html' and guessed != 'text/plain': return NON_HTML, guessed return 0
def testFilter1(self): domain_filter.g_exdm = None # force reload self.assertEqual(None, domain_filter.match('')) # exact domain match self.assertEqual('abc.com', domain_filter.match('http://abc.com/')) self.assertEqual(None, domain_filter.match('http://www.abc.com/')) self.assertEqual('abc.com', domain_filter.match('http://abc.com/index.html?a=b#c')) self.assertEqual('abc.com', domain_filter.match('http://*****:*****@abc.com/index.html?a=b#c')) self.assertEqual('def', domain_filter.match('http://def/')) self.assertEqual(None, domain_filter.match('http://www.def.com/')) # suffix domain match self.assertEqual(None, domain_filter.match('http://xyz.com/')) self.assertEqual('.xyz.com', domain_filter.match('http://www.xyz.com/')) self.assertEqual('.xyz.com', domain_filter.match('http://www.xyz.com/index.html?a=b#c')) self.assertEqual('.xyz.com', domain_filter.match('http://*****:*****@www.xyz.com/index.html?a=b#c'))