def test_int_to_ip(self): for ip, ip_int in self.ip_pairs: eq_(util.int_to_ip(ip_int), ip)
def canonical(self): "Convert URL to its canonical form" def full_unescape(url): """ Undo escaping of special characters in url """ unescaped_url = urllib.parse.unquote(url) if unescaped_url == url: return unescaped_url else: return full_unescape(unescaped_url) def quote(unsafe_string): """ Returns url safe representation of input with special characters escaped """ safe_chars = '!"$&\'()*+,-./:;<=>?@[\\]^_`{|}~' return urllib.parse.quote(unsafe_string, safe=safe_chars) url = self.url.strip() url = url.replace('\n', '').replace('\r', '').replace('\t', '') url = url.split('#', 1)[0] url = quote(full_unescape(url)) url_parts = urllib.parse.urlsplit(url) log.log(TRACE, 'url parts are %s', url_parts) if not url_parts[0]: url = 'http://%s' % url url_parts = urllib.parse.urlsplit(url) protocol = url_parts.scheme host = full_unescape(url_parts.hostname) path = full_unescape(url_parts.path) log.log(TRACE, "url host is '%s' and url path is '%s'", host, path) query = url_parts.query if not query and '?' not in url: query = None if not path: path = '/' has_trailing_slash = (path[-1] == '/') log.log(TRACE, 'url hash trailing slash: %s', has_trailing_slash) path = posixpath.normpath(path).replace('//', '/') if has_trailing_slash and path[-1] != '/': path = path + '/' _user = url_parts.username port = url_parts.port host = host.strip('.') host = re.sub(r'\.+', '.', host).lower() if host.isdigit(): log.debug("Host is digit: %s", host) host = util.int_to_ip(int(host)) log.debug("after conversion host is now %s", host) if host.startswith('0x') and '.' not in host: log.debug("Host is hex: %s", host) host = util.int_to_ip(int(host, 16)) log.debug("after conversion host is now %s", host) quoted_path = quote(path) quoted_host = quote(host) if port is not None: quoted_host = '%s:%s' % (quoted_host, port) canonical_url = '%s://%s%s' % (protocol, quoted_host, quoted_path) if query is not None: canonical_url = '%s?%s' % (canonical_url, query) log.log(TRACE, 'returning canonical url %s', canonical_url) return canonical_url