Exemplo n.º 1
0
    def test_caching(self):
        # Test case for bug #1313119
        uri = "http://example.com/doc/"
        unicode_uri = unicode(uri)

        urlparse.urlparse(unicode_uri)
        p = urlparse.urlparse(uri)
        self.assertEqual(type(p.scheme), type(uri))
        self.assertEqual(type(p.hostname), type(uri))
        self.assertEqual(type(p.path), type(uri))
Exemplo n.º 2
0
    def test_caching(self):
        # Test case for bug #1313119
        uri = "http://example.com/doc/"
        unicode_uri = str(uri)

        urlparse.urlparse(unicode_uri)
        p = urlparse.urlparse(uri)
        self.assertEqual(type(p.scheme), type(uri))
        self.assertEqual(type(p.hostname), type(uri))
        self.assertEqual(type(p.path), type(uri))
Exemplo n.º 3
0
 def test_portseparator(self):
     # Issue 754016 makes changes for port separator ':' from scheme separator
     self.assertEqual(urlparse.urlparse("path:80"),
                      ('', '', 'path:80', '', '', ''))
     self.assertEqual(urlparse.urlparse("http:"),
                      ('http', '', '', '', '', ''))
     self.assertEqual(urlparse.urlparse("https:"),
                      ('https', '', '', '', '', ''))
     self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
                      ('http', 'www.python.org:80', '', '', '', ''))
Exemplo n.º 4
0
 def test_anyscheme(self):
     # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
     self.assertEqual(urlparse.urlparse("s3://foo.com/stuff"),
                      ('s3','foo.com','/stuff','','',''))
     self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"),
                      ('x-newscheme','foo.com','/stuff','','',''))
     self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
                      ('x-newscheme','foo.com','/stuff','','query','fragment'))
     self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"),
                      ('x-newscheme','foo.com','/stuff','','query',''))
Exemplo n.º 5
0
 def test_withoutscheme(self):
     # Test urlparse without scheme
     # Issue 754016: urlparse goes wrong with IP:port without scheme
     # RFC 1808 specifies that netloc should start with //, urlparse expects
     # the same, otherwise it classifies the portion of url as path.
     self.assertEqual(urlparse.urlparse("path"),
             ('','','path','','',''))
     self.assertEqual(urlparse.urlparse("//www.python.org:80"),
             ('','www.python.org:80','','','',''))
     self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
             ('http','www.python.org:80','','','',''))
Exemplo n.º 6
0
 def test_withoutscheme(self):
     # Test urlparse without scheme
     # Issue 754016: urlparse goes wrong with IP:port without scheme
     # RFC 1808 specifies that netloc should start with //, urlparse expects
     # the same, otherwise it classifies the portion of url as path.
     self.assertEqual(urlparse.urlparse("path"),
                      ('', '', 'path', '', '', ''))
     self.assertEqual(urlparse.urlparse("//www.python.org:80"),
                      ('', 'www.python.org:80', '', '', '', ''))
     self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
                      ('http', 'www.python.org:80', '', '', '', ''))
Exemplo n.º 7
0
    def test_portseparator(self):
        # Issue 754016 makes changes for port separator ':' from scheme separator
        self.assertEqual(urlparse.urlparse("path:80"),
                ('','','path:80','','',''))
        self.assertEqual(urlparse.urlparse("http:"),('http','','','','',''))
        self.assertEqual(urlparse.urlparse("https:"),('https','','','','',''))
        self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
                ('http','www.python.org:80','','','',''))

# def test_main():
#     test_support.run_unittest(UrlParseTestCase)

# if __name__ == "__main__":
#     test_main()
Exemplo n.º 8
0
    def checkRoundtrips(self, url, parsed, split):
        result = urlparse.urlparse(url)
        self.assertEqual(result, parsed)
        t = (result.scheme, result.netloc, result.path, result.params,
             result.query, result.fragment)
        self.assertEqual(t, parsed)
        # put it back together and it should be the same
        result2 = urlparse.urlunparse(result)
        self.assertEqual(result2, url)
        self.assertEqual(result2, result.geturl())

        # the result of geturl() is a fixpoint; we can always parse it
        # again to get the same result:
        result3 = urlparse.urlparse(result.geturl())
        self.assertEqual(result3.geturl(), result.geturl())
        self.assertEqual(result3, result)
        self.assertEqual(result3.scheme, result.scheme)
        self.assertEqual(result3.netloc, result.netloc)
        self.assertEqual(result3.path, result.path)
        self.assertEqual(result3.params, result.params)
        self.assertEqual(result3.query, result.query)
        self.assertEqual(result3.fragment, result.fragment)
        self.assertEqual(result3.username, result.username)
        self.assertEqual(result3.password, result.password)
        self.assertEqual(result3.hostname, result.hostname)
        self.assertEqual(result3.port, result.port)

        # check the roundtrip using urlsplit() as well
        result = urlparse.urlsplit(url)
        self.assertEqual(result, split)
        t = (result.scheme, result.netloc, result.path, result.query,
             result.fragment)
        self.assertEqual(t, split)
        result2 = urlparse.urlunsplit(result)
        self.assertEqual(result2, url)
        self.assertEqual(result2, result.geturl())

        # check the fixpoint property of re-parsing the result of geturl()
        result3 = urlparse.urlsplit(result.geturl())
        self.assertEqual(result3.geturl(), result.geturl())
        self.assertEqual(result3, result)
        self.assertEqual(result3.scheme, result.scheme)
        self.assertEqual(result3.netloc, result.netloc)
        self.assertEqual(result3.path, result.path)
        self.assertEqual(result3.query, result.query)
        self.assertEqual(result3.fragment, result.fragment)
        self.assertEqual(result3.username, result.username)
        self.assertEqual(result3.password, result.password)
        self.assertEqual(result3.hostname, result.hostname)
        self.assertEqual(result3.port, result.port)
Exemplo n.º 9
0
    def checkRoundtrips(self, url, parsed, split):
        result = urlparse.urlparse(url)
        self.assertEqual(result, parsed)
        t = (result.scheme, result.netloc, result.path,
             result.params, result.query, result.fragment)
        self.assertEqual(t, parsed)
        # put it back together and it should be the same
        result2 = urlparse.urlunparse(result)
        self.assertEqual(result2, url)
        self.assertEqual(result2, result.geturl())

        # the result of geturl() is a fixpoint; we can always parse it
        # again to get the same result:
        result3 = urlparse.urlparse(result.geturl())
        self.assertEqual(result3.geturl(), result.geturl())
        self.assertEqual(result3,          result)
        self.assertEqual(result3.scheme,   result.scheme)
        self.assertEqual(result3.netloc,   result.netloc)
        self.assertEqual(result3.path,     result.path)
        self.assertEqual(result3.params,   result.params)
        self.assertEqual(result3.query,    result.query)
        self.assertEqual(result3.fragment, result.fragment)
        self.assertEqual(result3.username, result.username)
        self.assertEqual(result3.password, result.password)
        self.assertEqual(result3.hostname, result.hostname)
        self.assertEqual(result3.port,     result.port)

        # check the roundtrip using urlsplit() as well
        result = urlparse.urlsplit(url)
        self.assertEqual(result, split)
        t = (result.scheme, result.netloc, result.path,
             result.query, result.fragment)
        self.assertEqual(t, split)
        result2 = urlparse.urlunsplit(result)
        self.assertEqual(result2, url)
        self.assertEqual(result2, result.geturl())

        # check the fixpoint property of re-parsing the result of geturl()
        result3 = urlparse.urlsplit(result.geturl())
        self.assertEqual(result3.geturl(), result.geturl())
        self.assertEqual(result3,          result)
        self.assertEqual(result3.scheme,   result.scheme)
        self.assertEqual(result3.netloc,   result.netloc)
        self.assertEqual(result3.path,     result.path)
        self.assertEqual(result3.query,    result.query)
        self.assertEqual(result3.fragment, result.fragment)
        self.assertEqual(result3.username, result.username)
        self.assertEqual(result3.password, result.password)
        self.assertEqual(result3.hostname, result.hostname)
        self.assertEqual(result3.port,     result.port)
Exemplo n.º 10
0
def parse_url_archive(self, url):
    try:

        if ("\"" in url):
            return

        original_url = url.replace("http://arquivo.pt/noFrame/replay/", "")
        datetime_s = original_url.split("/")[0]
        original_url = original_url.split("/")[1:]
        original_url = "/".join(original_url)
        domain = urlparse(original_url).netloc

        pubdate = datetime.strptime(datetime_s, '%Y%m%d%H%M%S')

        doc = {
            "domain": domain,
            "url": url,
            "original_url": original_url,
            "pubdate": pubdate
        }

        db["processed_urls"].insert(doc)

    except Exception as exc:
        raise self.retry(exc=exc)
Exemplo n.º 11
0
    def test_issue14072(self):
        p1 = urlparse.urlsplit('tel:+31-641044153')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+31-641044153')

        p2 = urlparse.urlsplit('tel:+31641044153')
        self.assertEqual(p2.scheme, 'tel')
        self.assertEqual(p2.path, '+31641044153')

        # Assert for urlparse
        p1 = urlparse.urlparse('tel:+31-641044153')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+31-641044153')

        p2 = urlparse.urlparse('tel:+31641044153')
        self.assertEqual(p2.scheme, 'tel')
        self.assertEqual(p2.path, '+31641044153')
Exemplo n.º 12
0
    def test_issue14072(self):
        p1 = urlparse.urlsplit('tel:+31-641044153')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+31-641044153')

        p2 = urlparse.urlsplit('tel:+31641044153')
        self.assertEqual(p2.scheme, 'tel')
        self.assertEqual(p2.path, '+31641044153')

        # Assert for urlparse
        p1 = urlparse.urlparse('tel:+31-641044153')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+31-641044153')

        p2 = urlparse.urlparse('tel:+31641044153')
        self.assertEqual(p2.scheme, 'tel')
        self.assertEqual(p2.path, '+31641044153')
Exemplo n.º 13
0
    def test_attributes_bad_port(self):
        """Check handling of non-integer ports."""
        p = urlparse.urlsplit("http://www.example.net:foo")
        self.assertEqual(p.netloc, "www.example.net:foo")
        self.assertRaises(ValueError, lambda: p.port)

        p = urlparse.urlparse("http://www.example.net:foo")
        self.assertEqual(p.netloc, "www.example.net:foo")
        self.assertRaises(ValueError, lambda: p.port)
Exemplo n.º 14
0
    def test_attributes_bad_port(self):
        """Check handling of non-integer ports."""
        p = urlparse.urlsplit("http://www.example.net:foo")
        self.assertEqual(p.netloc, "www.example.net:foo")
        self.assertRaises(ValueError, lambda: p.port)

        p = urlparse.urlparse("http://www.example.net:foo")
        self.assertEqual(p.netloc, "www.example.net:foo")
        self.assertRaises(ValueError, lambda: p.port)
Exemplo n.º 15
0
    def test_telurl_params(self):
        p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '123-4')
        self.assertEqual(p1.params, 'phone-context=+1-650-516')

        p1 = urlparse.urlparse('tel:+1-201-555-0123')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+1-201-555-0123')
        self.assertEqual(p1.params, '')

        p1 = urlparse.urlparse('tel:7042;phone-context=example.com')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '7042')
        self.assertEqual(p1.params, 'phone-context=example.com')

        p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '863-1234')
        self.assertEqual(p1.params, 'phone-context=+1-914-555')
Exemplo n.º 16
0
    def test_telurl_params(self):
        p1 = urlparse.urlparse('tel:123-4;phone-context=+1-650-516')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '123-4')
        self.assertEqual(p1.params, 'phone-context=+1-650-516')

        p1 = urlparse.urlparse('tel:+1-201-555-0123')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '+1-201-555-0123')
        self.assertEqual(p1.params, '')

        p1 = urlparse.urlparse('tel:7042;phone-context=example.com')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '7042')
        self.assertEqual(p1.params, 'phone-context=example.com')

        p1 = urlparse.urlparse('tel:863-1234;phone-context=+1-914-555')
        self.assertEqual(p1.scheme, 'tel')
        self.assertEqual(p1.path, '863-1234')
        self.assertEqual(p1.params, 'phone-context=+1-914-555')
Exemplo n.º 17
0
 def test_unparse_parse(self):
     for u in [
             'Python',
             './Python',
             'x-newscheme://foo.com/stuff',
             'x://y',
             'x:/y',
             'x:/',
             '/',
     ]:
         self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
         self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
Exemplo n.º 18
0
    def test_RFC2732(self):
        for url, hostname, port in [
            ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
            ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
            ('http://[::1]:5432/foo/', '::1', 5432),
            ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
            ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
            ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
            ('http://[::ffff:12.34.56.78]:5432/foo/',
             '::ffff:12.34.56.78', 5432),
            ('http://Test.python.org/foo/', 'test.python.org', None),
            ('http://12.34.56.78/foo/', '12.34.56.78', None),
            ('http://[::1]/foo/', '::1', None),
            ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
            ('http://[dead:beef::]/foo/', 'dead:beef::', None),
            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
            ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
            ('http://[::ffff:12.34.56.78]/foo/',
             '::ffff:12.34.56.78', None),
            ('http://Test.python.org:/foo/', 'test.python.org', None),
            ('http://12.34.56.78:/foo/', '12.34.56.78', None),
            ('http://[::1]:/foo/', '::1', None),
            ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
            ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
            ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
            ('http://[::ffff:12.34.56.78]:/foo/',
             '::ffff:12.34.56.78', None),
            ]:
            urlparsed = urlparse.urlparse(url)
            self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))

        for invalid_url in [
                'http://::12.34.56.78]/',
                'http://[::1/foo/',
                'ftp://[::1/foo/bad]/bad',
                'http://[::1/foo/bad]/bad',
                'http://[::ffff:12.34.56.78']:
            self.assertRaises(ValueError, urlparse.urlparse, invalid_url)
Exemplo n.º 19
0
    def test_attributes_without_netloc(self):
        # This example is straight from RFC 3261.  It looks like it
        # should allow the username, hostname, and port to be filled
        # in, but doesn't.  Since it's a URI and doesn't use the
        # scheme://netloc syntax, the netloc and related attributes
        # should be left empty.
        uri = "sip:[email protected];maddr=239.255.255.1;ttl=15"
        p = urlparse.urlsplit(uri)
        self.assertEqual(p.netloc, "")
        self.assertEqual(p.username, None)
        self.assertEqual(p.password, None)
        self.assertEqual(p.hostname, None)
        self.assertEqual(p.port, None)
        self.assertEqual(p.geturl(), uri)

        p = urlparse.urlparse(uri)
        self.assertEqual(p.netloc, "")
        self.assertEqual(p.username, None)
        self.assertEqual(p.password, None)
        self.assertEqual(p.hostname, None)
        self.assertEqual(p.port, None)
        self.assertEqual(p.geturl(), uri)
Exemplo n.º 20
0
    def test_attributes_without_netloc(self):
        # This example is straight from RFC 3261.  It looks like it
        # should allow the username, hostname, and port to be filled
        # in, but doesn't.  Since it's a URI and doesn't use the
        # scheme://netloc syntax, the netloc and related attributes
        # should be left empty.
        uri = "sip:[email protected];maddr=239.255.255.1;ttl=15"
        p = urlparse.urlsplit(uri)
        self.assertEqual(p.netloc, "")
        self.assertEqual(p.username, None)
        self.assertEqual(p.password, None)
        self.assertEqual(p.hostname, None)
        self.assertEqual(p.port, None)
        self.assertEqual(p.geturl(), uri)

        p = urlparse.urlparse(uri)
        self.assertEqual(p.netloc, "")
        self.assertEqual(p.username, None)
        self.assertEqual(p.password, None)
        self.assertEqual(p.hostname, None)
        self.assertEqual(p.port, None)
        self.assertEqual(p.geturl(), uri)
Exemplo n.º 21
0
 def test_unparse_parse(self):
     for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]:
         self.assertEqual(urlparse.urlunsplit(urlparse.urlsplit(u)), u)
         self.assertEqual(urlparse.urlunparse(urlparse.urlparse(u)), u)
Exemplo n.º 22
0
 def test_noslash(self):
     # Issue 1637: http://foo.com?query is legal
     self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
                      ('http', 'example.com', '', '', 'blahblah=/foo', ''))
Exemplo n.º 23
0
 def test_RFC2368(self):
     # Issue 11467: path that starts with a number is not parsed correctly
     self.assertEqual(urlparse.urlparse('mailto:[email protected]'),
                      ('mailto', '', '*****@*****.**', '', '', ''))
Exemplo n.º 24
0
 def test_RFC2368(self):
     # Issue 11467: path that starts with a number is not parsed correctly
     self.assertEqual(urlparse.urlparse('mailto:[email protected]'),
             ('mailto', '', '*****@*****.**', '', '', ''))
Exemplo n.º 25
0
 def test_noslash(self):
     # Issue 1637: http://foo.com?query is legal
     self.assertEqual(urlparse.urlparse("http://example.com?blahblah=/foo"),
                      ('http', 'example.com', '', '', 'blahblah=/foo', ''))