def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) if cached: return cached if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() netloc = query = fragment = '' i = url.find(':') if i > 0: if url[:i] == 'http': # optimize the common case scheme = url[:i].lower() url = url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v for c in url[:i]: if c not in scheme_chars: break else: # make sure "url" is not actually a port number (in which case # "scheme" is really part of the path) rest = url[i + 1:] if not rest or any(c not in '0123456789' for c in rest): # not a port number scheme, url = url[:i].lower(), rest if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v
def _urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) netloc = query = fragment = '' i = url.find(':') if i > 0: for c in url[:i]: if c not in scheme_chars: break else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v)
def _urlsplit(url, scheme="", allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) allow_fragments = bool(allow_fragments) netloc = query = fragment = "" i = url.find(":") if i > 0: for c in url[:i]: if c not in scheme_chars: break else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == "//": netloc, url = _splitnetloc(url, 2) if ("[" in netloc and "]" not in netloc) or ("]" in netloc and "[" not in netloc): raise ValueError("Invalid IPv6 URL") if allow_fragments and "#" in url: url, fragment = url.split("#", 1) if "?" in url: url, query = url.split("?", 1) v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v)
def _urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" url, scheme, _coerce_result = _coerce_args(url, scheme) allow_fragments = bool(allow_fragments) netloc = query = fragment = '' i = url.find(':') if i > 0: for c in url[:i]: if c not in scheme_chars: break else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v)
def _urlsplit_netloc(url: str) -> Tuple[str, str]: netloc, url = _splitnetloc(url, 2) if ( ('[' in netloc and ']' not in netloc) or # noqa: W504, W503 (']' in netloc and '[' not in netloc) ): raise ValueError("Invalid IPv6 URL") return netloc, url
def _urlsplit(url, scheme='', allow_fragments=True): """Templating safe version of urllib.parse.urlsplit Ignores '?' and '#' inside {{}} templating tags. Caching disabled. """ url, scheme, _coerce_result = _coerce_args(url, scheme) allow_fragments = bool(allow_fragments) netloc = query = fragment = '' i = url.find(':') if i > 0: for c in url[:i]: if c not in scheme_chars: # pragma: no cover break # https://github.com/nedbat/coveragepy/issues/198 else: scheme, url = url[:i].lower(), url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or # noqa: W504, W503 (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: result = re.split(r'#(?![^{{}}]*}})', url, maxsplit=1) url = result[0] if len(result) > 1: fragment = result[1] if '?' in url: result = re.split(r'\?(?![^{{}}]*}})', url, maxsplit=1) url = result[0] if len(result) > 1: query = result[1] v = SplitResult(scheme, netloc, url, query, fragment) return _coerce_result(v)