def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator self.assertEqual(scurl.urlparse("path:80"), ('', '', 'path:80', '', '', '')) self.assertEqual(scurl.urlparse("http:"), ('http', '', '', '', '', '')) self.assertEqual(scurl.urlparse("https:"), ('https', '', '', '', '', '')) self.assertEqual(scurl.urlparse("http://www.python.org:80"), ('http', 'www.python.org:80', '', '', '', ''))
def test_caching(self): # Test case for bug #1313119 uri = "http://example.com/doc/" unicode_uri = unicode(uri) scurl.urlparse(unicode_uri) p = scurl.urlparse(uri) self.assertEqual(type(p.scheme), type(uri)) self.assertEqual(type(p.hostname), type(uri)) self.assertEqual(type(p.path), type(uri))
def test_withoutscheme(self): # Test urlparse without scheme # Issue 754016: urlparse goes wrong with IP:port without scheme # RFC 1808 specifies that netloc should start with //, urlparse expects # the same, otherwise it classifies the portion of url as path. self.assertEqual(scurl.urlparse("path"), ('', '', 'path', '', '', '')) self.assertEqual(scurl.urlparse("//www.python.org:80"), ('', 'www.python.org:80', '', '', '', '')) self.assertEqual(scurl.urlparse("http://www.python.org:80"), ('http', 'www.python.org:80', '', '', '', ''))
def test_anyscheme(self): # Issue 7904: s3://foo.com/stuff has netloc "foo.com". self.assertEqual(scurl.urlparse("s3://foo.com/stuff"), ('s3', 'foo.com', '/stuff', '', '', '')) self.assertEqual(scurl.urlparse("x-newscheme://foo.com/stuff"), ('x-newscheme', 'foo.com', '/stuff', '', '', '')) self.assertEqual( scurl.urlparse("x-newscheme://foo.com/stuff?query#fragment"), ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) self.assertEqual(scurl.urlparse("x-newscheme://foo.com/stuff?query"), ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
def checkRoundtrips(self, url, parsed, split): result = scurl.urlparse(url) self.assertEqual(result, parsed) t = (result.scheme, result.netloc, result.path, result.params, result.query, result.fragment) self.assertEqual(t, parsed) # put it back together and it should be the same result2 = scurl.urlunparse(result) self.assertEqual(result2, url) self.assertEqual(result2, result.geturl()) # the result of geturl() is a fixpoint; we can always parse it # again to get the same result: result3 = scurl.urlparse(result.geturl()) self.assertEqual(result3.geturl(), result.geturl()) self.assertEqual(result3, result) self.assertEqual(result3.scheme, result.scheme) self.assertEqual(result3.netloc, result.netloc) self.assertEqual(result3.path, result.path) self.assertEqual(result3.params, result.params) self.assertEqual(result3.query, result.query) self.assertEqual(result3.fragment, result.fragment) self.assertEqual(result3.username, result.username) self.assertEqual(result3.password, result.password) self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port) # check the roundtrip using urlsplit() as well result = scurl.urlsplit(url) self.assertEqual(result, split) t = (result.scheme, result.netloc, result.path, result.query, result.fragment) self.assertEqual(t, split) result2 = scurl.urlunsplit(result) self.assertEqual(result2, url) self.assertEqual(result2, result.geturl()) # check the fixpoint property of re-parsing the result of geturl() result3 = scurl.urlsplit(result.geturl()) self.assertEqual(result3.geturl(), result.geturl()) self.assertEqual(result3, result) self.assertEqual(result3.scheme, result.scheme) self.assertEqual(result3.netloc, result.netloc) self.assertEqual(result3.path, result.path) self.assertEqual(result3.query, result.query) self.assertEqual(result3.fragment, result.fragment) self.assertEqual(result3.username, result.username) self.assertEqual(result3.password, result.password) self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port)
def test_issue14072(self): p1 = scurl.urlsplit('tel:+31-641044153') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '+31-641044153') p2 = scurl.urlsplit('tel:+31641044153') self.assertEqual(p2.scheme, 'tel') self.assertEqual(p2.path, '+31641044153') # assert the behavior for urlparse p1 = scurl.urlparse('tel:+31-641044153') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '+31-641044153') p2 = scurl.urlparse('tel:+31641044153') self.assertEqual(p2.scheme, 'tel') self.assertEqual(p2.path, '+31641044153')
def main(): parser = argparse.ArgumentParser( description='Measure the time of urlsplit and urljoin') parser.add_argument('--encode', action='store_true', help='encode the urls (default: False)') args = parser.parse_args() encode = args.encode urlparse_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: if encode: url = url.encode() start = timer() a = urlparse(url) end = timer() urlparse_time += end - start print("the urlparse time is", urlparse_time / 5, "seconds") urlsplit_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: if encode: url = url.encode() start = timer() a = urlsplit(url) end = timer() urlsplit_time += end - start print("the urlsplit time is", urlsplit_time / 5, "seconds") urljoin_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: partial_url = "/asd" if encode: url = url.encode() partial_url = partial_url.encode() start = timer() a = urljoin(url, partial_url) end = timer() urljoin_time += end - start print("the urljoin time is", urljoin_time / 5, "seconds")
def test_attributes_bad_port(self): """Check handling of non-integer ports.""" p = scurl.urlsplit("http://www.example.net:foo") self.assertEqual(p.netloc, "www.example.net:foo") self.assertRaises(ValueError, lambda: p.port) p = scurl.urlparse("http://www.example.net:foo") self.assertEqual(p.netloc, "www.example.net:foo") self.assertRaises(ValueError, lambda: p.port)
def test_port_casting_failure_message(self): message = "Port could not be" p1 = scurl.urlparse('http://Server=sde; Service=sde:oracle') with self.assertRaisesRegex(ValueError, message): p1.port p2 = scurl.urlsplit('http://Server=sde; Service=sde:oracle') with self.assertRaisesRegex(ValueError, message): p2.port
def test_telurl_params(self): p1 = scurl.urlparse('tel:123-4;phone-context=+1-650-516') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '123-4') self.assertEqual(p1.params, 'phone-context=+1-650-516') p1 = scurl.urlparse('tel:+1-201-555-0123') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '+1-201-555-0123') self.assertEqual(p1.params, '') p1 = scurl.urlparse('tel:7042;phone-context=example.com') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '7042') self.assertEqual(p1.params, 'phone-context=example.com') p1 = scurl.urlparse('tel:863-1234;phone-context=+1-914-555') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '863-1234') self.assertEqual(p1.params, 'phone-context=+1-914-555')
def test_unparse_parse(self): for u in [ 'Python', './Python', 'x-newscheme://foo.com/stuff', 'x://y', 'x:/y', 'x:/', '/', ]: self.assertEqual(scurl.urlunsplit(scurl.urlsplit(u)), u) self.assertEqual(scurl.urlunparse(scurl.urlparse(u)), u)
def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled # in, but doesn't. Since it's a URI and doesn't use the # scheme://netloc syntax, the netloc and related attributes # should be left empty. uri = "sip:[email protected];maddr=239.255.255.1;ttl=15" p = scurl.urlsplit(uri) self.assertEqual(p.netloc, "") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) p = scurl.urlparse(uri) self.assertEqual(p.netloc, "") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) # You guessed it, repeating the test with bytes input uri = b"sip:[email protected];maddr=239.255.255.1;ttl=15" p = scurl.urlsplit(uri) self.assertEqual(p.netloc, b"") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) p = scurl.urlparse(uri) self.assertEqual(p.netloc, b"") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri)
def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator self.assertEqual(scurl.urlparse("path:80"), ('','','path:80','','','')) self.assertEqual(scurl.urlparse("http:"),('http','','','','','')) self.assertEqual(scurl.urlparse("https:"),('https','','','','','')) self.assertEqual(scurl.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) # As usual, need to check bytes input as well self.assertEqual(scurl.urlparse(b"path:80"), (b'',b'',b'path:80',b'',b'',b'')) self.assertEqual(scurl.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) self.assertEqual(scurl.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) self.assertEqual(scurl.urlparse(b"http://www.python.org:80"), (b'http',b'www.python.org:80',b'',b'',b'',b''))
def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes # accept multiple arguments. Check they reject mixed type input with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlparse("www.python.org", b"http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlparse(b"www.python.org", "http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlsplit("www.python.org", b"http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlsplit(b"www.python.org", "http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunparse(( b"http", "www.python.org","","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunparse(("http", b"www.python.org","","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunsplit((b"http", "www.python.org","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunsplit(("http", b"www.python.org","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urljoin("http://python.org", b"http://python.org") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urljoin(b"http://python.org", "http://python.org")
def test_RFC2732(self): for url, hostname, port in [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), ('http://[::ffff:12.34.56.78]:5432/foo/', '::ffff:12.34.56.78', 5432), ('http://Test.python.org/foo/', 'test.python.org', None), ('http://12.34.56.78/foo/', '12.34.56.78', None), ('http://[::1]/foo/', '::1', None), ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), ('http://[dead:beef::]/foo/', 'dead:beef::', None), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]/foo/', '::ffff:12.34.56.78', None), ('http://Test.python.org:/foo/', 'test.python.org', None), ('http://12.34.56.78:/foo/', '12.34.56.78', None), ('http://[::1]:/foo/', '::1', None), ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), ('http://[dead:beef::]:/foo/', 'dead:beef::', None), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]:/foo/', '::ffff:12.34.56.78', None), ]: urlparsed = scurl.urlparse(url) self.assertEqual((urlparsed.hostname, urlparsed.port), (hostname, port)) for invalid_url in [ 'http://::12.34.56.78]/', 'http://[::1/foo/', 'ftp://[::1/foo/bad]/bad', 'http://[::1/foo/bad]/bad', 'http://[::ffff:12.34.56.78' ]: self.assertRaises(ValueError, scurl.urlparse, invalid_url)
def test_RFC2732(self): str_cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), ('http://[::ffff:12.34.56.78]:5432/foo/', '::ffff:12.34.56.78', 5432), ('http://Test.python.org/foo/', 'test.python.org', None), ('http://12.34.56.78/foo/', '12.34.56.78', None), ('http://[::1]/foo/', '::1', None), ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), ('http://[dead:beef::]/foo/', 'dead:beef::', None), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]/foo/', '::ffff:12.34.56.78', None), ('http://Test.python.org:/foo/', 'test.python.org', None), ('http://12.34.56.78:/foo/', '12.34.56.78', None), ('http://[::1]:/foo/', '::1', None), ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), ('http://[dead:beef::]:/foo/', 'dead:beef::', None), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]:/foo/', '::ffff:12.34.56.78', None), ] def _encode(t): return t[0].encode('ascii'), t[1].encode('ascii'), t[2] bytes_cases = [_encode(x) for x in str_cases] for url, hostname, port in str_cases + bytes_cases: urlparsed = scurl.urlparse(url) self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
def test_unparse_parse(self): str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] bytes_cases = [x.encode('ascii') for x in str_cases] for u in str_cases + bytes_cases: self.assertEqual(scurl.urlunsplit(scurl.urlsplit(u)), u) self.assertEqual(scurl.urlunparse(scurl.urlparse(u)), u)
def test_noslash(self): # Issue 1637: http://foo.com?query is legal self.assertEqual(scurl.urlparse("http://example.com?blahblah=/foo"), ('http', 'example.com', '', '', 'blahblah=/foo', ''))
def run_urlparse(urls): for url in urls: a = scurl.urlparse(url)
def test_RFC2368(self): # Issue 11467: path that starts with a number is not parsed correctly self.assertEqual(scurl.urlparse('mailto:[email protected]'), ('mailto', '', '*****@*****.**', '', '', ''))