def test_caching(self): # Test case for bug #1313119 uri = "http://example.com/doc/" unicode_uri = unicode(uri) urlparse.urlparse(unicode_uri) p = urlparse.urlparse(uri) self.assertEqual(type(p.scheme), type(uri)) self.assertEqual(type(p.hostname), type(uri)) self.assertEqual(type(p.path), type(uri))
def test_caching(self): # Test case for bug #1313119 uri = "http://example.com/doc/" unicode_uri = str(uri) urlparse.urlparse(unicode_uri) p = urlparse.urlparse(uri) self.assertEqual(type(p.scheme), type(uri)) self.assertEqual(type(p.hostname), type(uri)) self.assertEqual(type(p.path), type(uri))
def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator self.assertEqual(urlparse.urlparse("path:80"), ('', '', 'path:80', '', '', '')) self.assertEqual(urlparse.urlparse("http:"), ('http', '', '', '', '', '')) self.assertEqual(urlparse.urlparse("https:"), ('https', '', '', '', '', '')) self.assertEqual(urlparse.urlparse("http://www.python.org:80"), ('http', 'www.python.org:80', '', '', '', ''))
def test_anyscheme(self): # Issue 7904: s3://foo.com/stuff has netloc "foo.com". self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"), ('s3','foo.com','/stuff','','','')) self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"), ('x-newscheme','foo.com','/stuff','','','')) self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), ('x-newscheme','foo.com','/stuff','','query','fragment')) self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"), ('x-newscheme','foo.com','/stuff','','query',''))
def test_withoutscheme(self): # Test urlparse without scheme # Issue 754016: urlparse goes wrong with IP:port without scheme # RFC 1808 specifies that netloc should start with //, urlparse expects # the same, otherwise it classifies the portion of url as path. self.assertEqual(urlparse.urlparse("path"), ('','','path','','','')) self.assertEqual(urlparse.urlparse("//www.python.org:80"), ('','www.python.org:80','','','','')) self.assertEqual(urlparse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','',''))
def test_withoutscheme(self): # Test urlparse without scheme # Issue 754016: urlparse goes wrong with IP:port without scheme # RFC 1808 specifies that netloc should start with //, urlparse expects # the same, otherwise it classifies the portion of url as path. self.assertEqual(urlparse.urlparse("path"), ('', '', 'path', '', '', '')) self.assertEqual(urlparse.urlparse("//www.python.org:80"), ('', 'www.python.org:80', '', '', '', '')) self.assertEqual(urlparse.urlparse("http://www.python.org:80"), ('http', 'www.python.org:80', '', '', '', ''))
def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator self.assertEqual(urlparse.urlparse("path:80"), ('','','path:80','','','')) self.assertEqual(urlparse.urlparse("http:"),('http','','','','','')) self.assertEqual(urlparse.urlparse("https:"),('https','','','','','')) self.assertEqual(urlparse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) # def test_main(): # test_support.run_unittest(UrlParseTestCase) # if __name__ == "__main__": # test_main()
def checkRoundtrips(self, url, parsed, split): result = urlparse.urlparse(url) self.assertEqual(result, parsed) t = (result.scheme, result.netloc, result.path, result.params, result.query, result.fragment) self.assertEqual(t, parsed) # put it back together and it should be the same result2 = urlparse.urlunparse(result) self.assertEqual(result2, url) self.assertEqual(result2, result.geturl()) # the result of geturl() is a fixpoint; we can always parse it # again to get the same result: result3 = urlparse.urlparse(result.geturl()) self.assertEqual(result3.geturl(), result.geturl()) self.assertEqual(result3, result) self.assertEqual(result3.scheme, result.scheme) self.assertEqual(result3.netloc, result.netloc) self.assertEqual(result3.path, result.path) self.assertEqual(result3.params, result.params) self.assertEqual(result3.query, result.query) self.assertEqual(result3.fragment, result.fragment) self.assertEqual(result3.username, result.username) self.assertEqual(result3.password, result.password) self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port) # check the roundtrip using urlsplit() as well result = urlparse.urlsplit(url) self.assertEqual(result, split) t = (result.scheme, result.netloc, result.path, result.query, result.fragment) self.assertEqual(t, split) result2 = urlparse.urlunsplit(result) self.assertEqual(result2, url) self.assertEqual(result2, result.geturl()) # check the fixpoint property of re-parsing the result of geturl() result3 = urlparse.urlsplit(result.geturl()) self.assertEqual(result3.geturl(), result.geturl()) self.assertEqual(result3, result) self.assertEqual(result3.scheme, result.scheme) self.assertEqual(result3.netloc, result.netloc) self.assertEqual(result3.path, result.path) self.assertEqual(result3.query, result.query) self.assertEqual(result3.fragment, result.fragment) self.assertEqual(result3.username, result.username) self.assertEqual(result3.password, result.password) self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port)
def parse_url_archive(self, url): try: if ("\"" in url): return original_url = url.replace("http://arquivo.pt/noFrame/replay/", "") datetime_s = original_url.split("/")[0] original_url = original_url.split("/")[1:] original_url = "/".join(original_url) domain = urlparse(original_url).netloc pubdate = datetime.strptime(datetime_s, '%Y%m%d%H%M%S') doc = { "domain": domain, "url": url, "original_url": original_url, "pubdate": pubdate } db["processed_urls"].insert(doc) except Exception as exc: raise self.retry(exc=exc)
def test_issue14072(self): p1 = urlparse.urlsplit('tel:+31-641044153') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '+31-641044153') p2 = urlparse.urlsplit('tel:+31641044153') self.assertEqual(p2.scheme, 'tel') self.assertEqual(p2.path, '+31641044153') # Assert for urlparse p1 = urlparse.urlparse('tel:+31-641044153') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '+31-641044153') p2 = urlparse.urlparse('tel:+31641044153') self.assertEqual(p2.scheme, 'tel') self.assertEqual(p2.path, '+31641044153')
def test_attributes_bad_port(self): """Check handling of non-integer ports.""" p = urlparse.urlsplit("http://www.example.net:foo") self.assertEqual(p.netloc, "www.example.net:foo") self.assertRaises(ValueError, lambda: p.port) p = urlparse.urlparse("http://www.example.net:foo") self.assertEqual(p.netloc, "www.example.net:foo") self.assertRaises(ValueError, lambda: p.port)
def test_telurl_params(self): p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '123-4') self.assertEqual(p1.params, 'phone-context=+1-650-516') p1 = urlparse.urlparse('tel:+1-201-555-0123') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '+1-201-555-0123') self.assertEqual(p1.params, '') p1 = urlparse.urlparse('tel:7042;phone-context=example.com') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '7042') self.assertEqual(p1.params, 'phone-context=example.com') p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '863-1234') self.assertEqual(p1.params, 'phone-context=+1-914-555')
def test_unparse_parse(self): for u in [ 'Python', './Python', 'x-newscheme://foo.com/stuff', 'x://y', 'x:/y', 'x:/', '/', ]: self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u) self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
def test_RFC2732(self): for url, hostname, port in [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), ('http://[::ffff:12.34.56.78]:5432/foo/', '::ffff:12.34.56.78', 5432), ('http://Test.python.org/foo/', 'test.python.org', None), ('http://12.34.56.78/foo/', '12.34.56.78', None), ('http://[::1]/foo/', '::1', None), ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), ('http://[dead:beef::]/foo/', 'dead:beef::', None), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]/foo/', '::ffff:12.34.56.78', None), ('http://Test.python.org:/foo/', 'test.python.org', None), ('http://12.34.56.78:/foo/', '12.34.56.78', None), ('http://[::1]:/foo/', '::1', None), ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), ('http://[dead:beef::]:/foo/', 'dead:beef::', None), ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]:/foo/', '::ffff:12.34.56.78', None), ]: urlparsed = urlparse.urlparse(url) self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) for invalid_url in [ 'http://::12.34.56.78]/', 'http://[::1/foo/', 'ftp://[::1/foo/bad]/bad', 'http://[::1/foo/bad]/bad', 'http://[::ffff:12.34.56.78']: self.assertRaises(ValueError, urlparse.urlparse, invalid_url)
def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled # in, but doesn't. Since it's a URI and doesn't use the # scheme://netloc syntax, the netloc and related attributes # should be left empty. uri = "sip:[email protected];maddr=239.255.255.1;ttl=15" p = urlparse.urlsplit(uri) self.assertEqual(p.netloc, "") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) p = urlparse.urlparse(uri) self.assertEqual(p.netloc, "") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri)
def test_unparse_parse(self): for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]: self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u) self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
def test_noslash(self): # Issue 1637: http://foo.com?query is legal self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"), ('http', 'example.com', '', '', 'blahblah=/foo', ''))
def test_RFC2368(self): # Issue 11467: path that starts with a number is not parsed correctly self.assertEqual(urlparse.urlparse('mailto:[email protected]'), ('mailto', '', '*****@*****.**', '', '', ''))