def test_invalid(self): urls = ['://stuff.com', '//stuff.com', 'stuff.com', 'www.stuff.com', 'http://', 'http:', 'http', 'ftp://[email protected]', 'ftp://[email protected]/path?q#frag', 'ftp://ftp.epcc.ed.ac.uk ', 'http://a', 'http://a.', 'http://a.b', 'http://a:a.b', 'http://a/b.c/ef.gh', 'http://999.999.999.999:123ab', 'http://999.999.999.999:-12', 'http://1234567890123456789012345678901234567890123456789012345678901234.com'] for url in urls: self.assertFalse(validator.is_valid(url))
parser.add_argument('-c', '--comparator', choices=comparators.keys(), help='comparison function to define URL uniqueness (DEFAULT=alpha)') args = parser.parse_args() # read URLs from file urls = None try: urls = open(args.input).readlines() urls = [x.strip() for x in urls] except IOError: print "Error: File \"%s\" not found." % args.input sys.exit() # default comparator is alpha cmp = comparators['alpha'] if args.comparator is not None: cmp = comparators[args.comparator] # set of urls, normalized normUrls = normalizer.normalize_list(urls) # print results for i, url in enumerate(urls): normUrl = normUrls[i] print 'Source: ', url print 'Valid: ', validator.is_valid(url) print 'Canonicalized: ', normUrl print 'Source unique: ', is_unique(url, urls, cmp) print 'Canonicalized unique: ', is_unique(normUrl, normUrls, cmp) print
def test_valid(self): urls = ['https://stuff.com', 'http://stuff.com', 'ftp://stuff.com', 'ftps://stuff.com', 'http://stuff.com/path//////', 'http://stuff.com/path.more/file.ext', 'http://stuff.com/path?q=wat+stuff', 'http://stuff.com/path#frag', 'http://stuff.com/path?q#frag', 'ftp://ftp.epcc.ed.ac', 'http://a.bc', 'http://a.bcdefg', 'http://a.bcde-fgh', 'http://a.b.c.ef', 'http://2001.0db8.85a3.0000.0000.8a2e.0370.7334', 'http://zzzz.gggg.eeee.9999.1234.mmmm.aaaa.wwww', 'http://123.1.2.3', 'http://999.999.999.999', 'http://999.999.999.999:99999', 'http://localhost', 'http://123456789012345678901234567890123456789012345678901234567890123.com'] for url in urls: self.assertTrue(validator.is_valid(url))