def test_normalize_base_url(): base_url = 'http://blog.example.com/feed.xml' url = '/post/123.html' r = normalize_url(url, base_url=base_url) assert r == 'http://blog.example.com/post/123.html' url = 'post/123.html' r = normalize_url(url, base_url=base_url) assert r == 'http://blog.example.com/post/123.html' url = '/' r = normalize_url(url, base_url=base_url) assert r == 'http://blog.example.com/'
def test_normalize_url(): cases = [ (None, ''), ('hello world', 'hello world'), ('你好世界', '你好世界'), ('2fd1ca54895', '2fd1ca54895'), ('www.example.com', 'http://www.example.com'), ('://www.example.com', 'http://www.example.com'), ('http://example.comblog', 'http://example.com/blog'), ('http://example.com//blog', 'http://example.com/blog'), ('http://example.com%5Cblog', 'http://example.com/blog'), ('http://example.com%5Cblog/hello', 'http://example.com/blog/hello'), ('http%3A//www.example.com', 'http://www.example.com'), ('http://www.example.com:80', 'http://www.example.com'), ('https://www.example.com:443', 'https://www.example.com'), ( 'http://www.example.comhttp://www.example.com/hello', 'http://www.example.com/hello' ), ( 'http://www.example.com/white space', 'http://www.example.com/white%20space' ), ] for url, expect in cases: norm = normalize_url(url) assert norm == expect, f'url={url!r} normalize={norm!r} expect={expect!r}'
def test_normalize_url(): cases = [ (None, ''), ('hello world', 'hello world'), ('你好世界', '你好世界'), ('2fd1ca54895', '2fd1ca54895'), ('www.example.com', 'http://www.example.com'), ('://www.example.com', 'http://www.example.com'), ('http://example.comblog', 'http://example.com/blog'), ('http://example.com//blog', 'http://example.com/blog'), ('http://example.com%5Cblog', 'http://example.com/blog'), ('http://example.com%5Cblog/hello', 'http://example.com/blog/hello'), ('http%3A//www.example.com', 'http://www.example.com'), ('http://www.example.com:80', 'http://www.example.com'), ('https://www.example.com:443', 'https://www.example.com'), ('http://example%5B.]com/x.php?age=23', 'http://example%5B.]com/x.php?age=23'), ('http://example%5B.]com', 'http://example%5B.]com'), ('http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:80/', 'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/'), ('http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443/', 'http://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443/'), ('https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]:443/', 'https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]/'), ('http://www.example.comhttp://www.example.com/hello', 'http://www.example.com/hello'), ('http://www.example.com/white space', 'http://www.example.com/white%20space'), ('https://www.example.com.cn/test', 'https://www.example.com.cn/test'), ('https://www.bmpi.dev/dev/guide-to-serverless', 'https://www.bmpi.dev/dev/guide-to-serverless'), ] for url, expect in cases: norm = normalize_url(url) assert norm == expect, f'url={url!r} normalize={norm!r} expect={expect!r}'
def test_normalize_invalid_url(): urls_text = _read_text('test_normalize_url.txt') urls = list(urls_text.strip().splitlines()) for url in urls: norm_url = normalize_url(url) if url.startswith('urn:') or url.startswith('magnet:'): assert norm_url == url else: assert validate_url(norm_url) == norm_url
def test_normalize_quote(): base = 'http://blog.example.com' base_url = 'http://blog.example.com/feed.xml' path_s = [ '/post/2019-01-10-%E5%AF%BB%E6%89%BE-sourcetree-%E6%9B%BF%E4%BB%A3%E5%93%81/', '/notes/%E8%9A%81%E9%98%85%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96%E8%AE%B0%E5%BD%95', ] for p in path_s: r = normalize_url(p, base_url=base_url) assert r == base + p