def test_has_url_any_extension(self): url = 'http://www.feedreader.com/releases/FeedReader314Setup.exe' self.assertTrue(has_url_any_extension(url, ['.exe', '.pdf'])) self.assertFalse(has_url_any_extension(url, ['.pdf'])) url = 'http://servis.idnes.cz/GetFile.aspx?type=idneskindle' self.assertTrue(has_url_any_extension(url, ['.aspx'])) self.assertFalse(has_url_any_extension(url, [])) url = 'http://www.feedreader.com/blog' self.assertFalse(has_url_any_extension(url, ['.com'])) url = 'http://www.feedreader.com/testimonials.php' self.assertTrue(has_url_any_extension(url, ['.php'])) self.assertFalse(has_url_any_extension(url, ['php']))
def url_allowed(self, url): url = to_str(url) parsed_url = urlparse(url) allowed = parsed_url.scheme in ['http', 'https', 'file'] # filter mobile and pda sites if allowed and self.filter_mobile: allowed &= not parsed_url.netloc.startswith('m.') allowed &= not parsed_url.netloc.startswith('pda.') if allowed and self.allow_res: allowed &= _matches(url, self.allow_res) if allowed and self.deny_res: allowed &= not _matches(url, self.deny_res) if allowed and self.allow_domains: allowed &= is_url_from_any_domain(parsed_url, self.allow_domains) if allowed and self.deny_domains: allowed &= not is_url_from_any_domain(parsed_url, self.deny_domains) if allowed and self.deny_extensions: allowed &= not has_url_any_extension(parsed_url, self.deny_extensions) return allowed