def url_unparse(components): """The reverse operation to :meth:`url_parse`. This accepts arbitrary as well as :class:`URL` tuples and returns a URL as a string. :param components: the parsed URL as tuple which should be converted into a URL string. """ scheme, netloc, path, query, fragment = \ normalize_string_tuple(components) s = make_literal_wrapper(scheme) url = s('') # We generally treat file:///x and file:/x the same which is also # what browsers seem to do. This also allows us to ignore a schema # register for netloc utilization or having to differenciate between # empty and missing netloc. if netloc or (scheme and path.startswith(s('/'))): if path and path[:1] != s('/'): path = s('/') + path url = s('//') + (netloc or s('')) + path elif path: url += path if scheme: url = scheme + s(':') + url if query: url = url + s('?') + query if fragment: url = url + s('#') + fragment return url
def __init__(self, stream=None, filename=None, name=None, content_type=None, content_length=None, headers=None): self.name = name self.stream = stream or _empty_stream # if no filename is provided we can attempt to get the filename # from the stream object passed. There we have to be careful to # skip things like <fdopen>, <stderr> etc. Python marks these # special filenames with angular brackets. if filename is None: filename = getattr(stream, 'name', None) s = make_literal_wrapper(filename) if filename and filename[0] == s('<') and filename[-1] == s('>'): filename = None # On Python 3 we want to make sure the filename is always unicode. # This might not be if the name attribute is bytes due to the # file being opened from the bytes API. if isinstance(filename, bytes): filename = filename.decode(sys.getfilesystemencoding(), 'replace') self.filename = filename if headers is None: headers = Headers() self.headers = headers if content_type is not None: headers['Content-Type'] = content_type if content_length is not None: headers['Content-Length'] = str(content_length)
def __init__(self, path='/', base_url=None, query_string=None, method='GET', input_stream=None, content_type=None, content_length=None, errors_stream=None, multithread=False, multiprocess=False, run_once=False, headers=None, data=None, environ_base=None, environ_overrides=None, charset='utf-8'): path_s = make_literal_wrapper(path) if query_string is None and path_s('?') in path: path, query_string = path.split(path_s('?'), 1) self.charset = charset self.path = iri_to_uri(path) if base_url is not None: base_url = url_fix(iri_to_uri(base_url, charset), charset) self.base_url = base_url if isinstance(query_string, (bytes, text_type)): self.query_string = query_string else: if query_string is None: query_string = MultiDict() elif not isinstance(query_string, MultiDict): query_string = MultiDict(query_string) self.args = query_string self.method = method if headers is None: headers = Headers() elif not isinstance(headers, Headers): headers = Headers(headers) self.headers = headers if content_type is not None: self.content_type = content_type if errors_stream is None: errors_stream = sys.stderr self.errors_stream = errors_stream self.multithread = multithread self.multiprocess = multiprocess self.run_once = run_once self.environ_base = environ_base self.environ_overrides = environ_overrides self.input_stream = input_stream self.content_length = content_length self.closed = False if data: if input_stream is not None: raise TypeError('can\'t provide input stream and data') if hasattr(data, 'read'): data = data.read() if isinstance(data, text_type): data = data.encode(self.charset) if isinstance(data, bytes): self.input_stream = BytesIO(data) if self.content_length is None: self.content_length = len(data) else: for key, value in _iter_data(data): if isinstance(value, (tuple, dict)) or \ hasattr(value, 'read'): self._add_file_from_data(key, value) else: self.form.setlistdefault(key).append(value)
def __init__(self, path='/', base_url=None, query_string=None, method='GET', input_stream=None, content_type=None, content_length=None, errors_stream=None, multithread=False, multiprocess=False, run_once=False, headers=None, data=None, environ_base=None, environ_overrides=None, charset='utf-8'): path_s = make_literal_wrapper(path) if query_string is None and path_s('?') in path: path, query_string = path.split(path_s('?'), 1) self.charset = charset self.path = iri_to_uri(path) if base_url is not None: base_url = url_fix(iri_to_uri(base_url, charset), charset) self.base_url = base_url if isinstance(query_string, (bytes, str)): self.query_string = query_string else: if query_string is None: query_string = MultiDict() elif not isinstance(query_string, MultiDict): query_string = MultiDict(query_string) self.args = query_string self.method = method if headers is None: headers = Headers() elif not isinstance(headers, Headers): headers = Headers(headers) self.headers = headers if content_type is not None: self.content_type = content_type if errors_stream is None: errors_stream = sys.stderr self.errors_stream = errors_stream self.multithread = multithread self.multiprocess = multiprocess self.run_once = run_once self.environ_base = environ_base self.environ_overrides = environ_overrides self.input_stream = input_stream self.content_length = content_length self.closed = False if data: if input_stream is not None: raise TypeError('can\'t provide input stream and data') if isinstance(data, str): data = data.encode(self.charset) if isinstance(data, bytes): self.input_stream = BytesIO(data) if self.content_length is None: self.content_length = len(data) else: for key, value in _iter_data(data): if ( isinstance(value, (tuple, dict)) or hasattr(value, 'read') ): self._add_file_from_data(key, value) else: self.form.setlistdefault(key).append(value)
def url_parse(url, scheme=None, allow_fragments=True): """Parses a URL from a string into a :class:`URL` tuple. If the URL is lacking a scheme it can be provided as second argument. Otherwise, it is ignored. Optionally fragments can be stripped from the URL by setting `allow_fragments` to `False`. The inverse of this function is :func:`url_unparse`. :param url: the URL to parse. :param scheme: the default schema to use if the URL is schemaless. :param allow_fragments: if set to `False` a fragment will be removed from the URL. """ s = make_literal_wrapper(url) is_text_based = isinstance(url, text_type) if scheme is None: scheme = s("") netloc = query = fragment = s("") i = url.find(s(":")) if i > 0 and _scheme_re.match(to_native(url[:i], errors="replace")): # make sure "iri" is not actually a port number (in which case # "scheme" is really part of the path) rest = url[i + 1:] if not rest or any(c not in s("0123456789") for c in rest): # not a port number scheme, url = url[:i].lower(), rest if url[:2] == s("//"): delim = len(url) for c in s("/?#"): wdelim = url.find(c, 2) if wdelim >= 0: delim = min(delim, wdelim) netloc, url = url[2:delim], url[delim:] if (s("[") in netloc and s("]") not in netloc) or (s("]") in netloc and s("[") not in netloc): raise ValueError("Invalid IPv6 URL") if allow_fragments and s("#") in url: url, fragment = url.split(s("#"), 1) if s("?") in url: url, query = url.split(s("?"), 1) result_type = is_text_based and URL or BytesURL return result_type(scheme, netloc, url, query, fragment)
def _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors): for pair in pair_iter: if not pair: continue s = make_literal_wrapper(pair) equal = s('=') if equal in pair: key, value = pair.split(equal, 1) else: if not include_empty: continue key = pair value = s('') key = url_unquote_plus(key, charset, errors) if charset is not None and PY2 and not decode_keys: key = try_coerce_native(key) yield key, url_unquote_plus(value, charset, errors)
def url_parse(url, scheme=None, allow_fragments=True): """Parses a URL from a string into a :class:`URL` tuple. If the URL is lacking a scheme it can be provided as second argument. Otherwise, it is ignored. Optionally fragments can be stripped from the URL by setting `allow_fragments` to `False`. The inverse of this function is :func:`url_unparse`. :param url: the URL to parse. :param scheme: the default schema to use if the URL is schemaless. :param allow_fragments: if set to `False` a fragment will be removed from the URL. """ s = make_literal_wrapper(url) is_text_based = isinstance(url, text_type) if scheme is None: scheme = s('') netloc = query = fragment = s('') i = url.find(s(':')) if i > 0 and _scheme_re.match(to_native(url[:i], errors='replace')): # make sure "iri" is not actually a port number (in which case # "scheme" is really part of the path) rest = url[i + 1:] if not rest or any(c not in s('0123456789') for c in rest): # not a port number scheme, url = url[:i].lower(), rest if url[:2] == s('//'): delim = len(url) for c in s('/?#'): wdelim = url.find(c, 2) if wdelim >= 0: delim = min(delim, wdelim) netloc, url = url[2:delim], url[delim:] if ((s('[') in netloc and s(']') not in netloc) or (s(']') in netloc and s('[') not in netloc)): raise ValueError('Invalid IPv6 URL') if allow_fragments and s('#') in url: url, fragment = url.split(s('#'), 1) if s('?') in url: url, query = url.split(s('?'), 1) result_type = is_text_based and URL or BytesURL return result_type(scheme, netloc, url, query, fragment)
def make_line_iter(stream, limit=None, buffer_size=10 * 1024): """Safely iterates line-based over an input stream. If the input stream is not a :class:`LimitedStream` the `limit` parameter is mandatory. This uses the stream's :meth:`~file.read` method internally as opposite to the :meth:`~file.readline` method that is unsafe and can only be used in violation of the WSGI specification. The same problem applies to the `__iter__` function of the input stream which calls :meth:`~file.readline` without arguments. If you need line-by-line processing it's strongly recommended to iterate over the input stream using this helper function. .. versionchanged:: 0.8 This function now ensures that the limit was reached. .. versionadded:: 0.9 added support for iterators as input stream. :param stream: the stream or iterate to iterate over. :param limit: the limit in bytes for the stream. (Usually content length. Not necessary if the `stream` is a :class:`LimitedStream`. :param buffer_size: The optional buffer size. """ _iter = _make_chunk_iter(stream, limit, buffer_size) first_item = next(_iter, '') if not first_item: return s = make_literal_wrapper(first_item) empty = s('') cr = s('\r') lf = s('\n') crlf = s('\r\n') _iter = chain((first_item,), _iter) def _iter_basic_lines(): _join = empty.join buffer = [] while 1: new_data = next(_iter, '') if not new_data: break new_buf = [] for item in chain(buffer, new_data.splitlines(True)): new_buf.append(item) if item and item[-1:] in crlf: yield _join(new_buf) new_buf = [] buffer = new_buf if buffer: yield _join(buffer) # This hackery is necessary to merge 'foo\r' and '\n' into one item # of 'foo\r\n' if we were unlucky and we hit a chunk boundary. previous = empty for item in _iter_basic_lines(): if item == lf and previous[-1:] == cr: previous += item item = empty if previous: yield previous previous = item if previous: yield previous
def make_line_iter(stream, limit=None, buffer_size=10 * 1024, cap_at_buffer=False): """Safely iterates line-based over an input stream. If the input stream is not a :class:`LimitedStream` the `limit` parameter is mandatory. This uses the stream's :meth:`~file.read` method internally as opposite to the :meth:`~file.readline` method that is unsafe and can only be used in violation of the WSGI specification. The same problem applies to the `__iter__` function of the input stream which calls :meth:`~file.readline` without arguments. If you need line-by-line processing it's strongly recommended to iterate over the input stream using this helper function. .. versionchanged:: 0.8 This function now ensures that the limit was reached. .. versionadded:: 0.9 added support for iterators as input stream. .. versionadded:: 0.11.10 added support for the `cap_at_buffer` parameter. :param stream: the stream or iterate to iterate over. :param limit: the limit in bytes for the stream. (Usually content length. Not necessary if the `stream` is a :class:`LimitedStream`. :param buffer_size: The optional buffer size. :param cap_at_buffer: if this is set chunks are split if they are longer than the buffer size. Internally this is implemented that the buffer size might be exhausted by a factor of two however. """ _iter = _make_chunk_iter(stream, limit, buffer_size) first_item = next(_iter, '') if not first_item: return s = make_literal_wrapper(first_item) empty = s('') cr = s('\r') lf = s('\n') crlf = s('\r\n') _iter = chain((first_item, ), _iter) def _iter_basic_lines(): _join = empty.join buffer = [] while 1: new_data = next(_iter, '') if not new_data: break new_buf = [] buf_size = 0 for item in chain(buffer, new_data.splitlines(True)): new_buf.append(item) buf_size += len(item) if item and item[-1:] in crlf: yield _join(new_buf) new_buf = [] elif cap_at_buffer and buf_size >= buffer_size: rv = _join(new_buf) while len(rv) >= buffer_size: yield rv[:buffer_size] rv = rv[buffer_size:] new_buf = [rv] buffer = new_buf if buffer: yield _join(buffer) # This hackery is necessary to merge 'foo\r' and '\n' into one item # of 'foo\r\n' if we were unlucky and we hit a chunk boundary. previous = empty for item in _iter_basic_lines(): if item == lf and previous[-1:] == cr: previous += item item = empty if previous: yield previous previous = item if previous: yield previous
def url_join(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute interpretation of the latter. :param base: the base URL for the join operation. :param url: the URL to join. :param allow_fragments: indicates whether fragments should be allowed. """ if isinstance(base, tuple): base = url_unparse(base) if isinstance(url, tuple): url = url_unparse(url) base, url = normalize_string_tuple((base, url)) s = make_literal_wrapper(base) if not base: return url if not url: return base bscheme, bnetloc, bpath, bquery, bfragment = \ url_parse(base, allow_fragments=allow_fragments) scheme, netloc, path, query, fragment = \ url_parse(url, bscheme, allow_fragments) if scheme != bscheme: return url if netloc: return url_unparse((scheme, netloc, path, query, fragment)) netloc = bnetloc if path[:1] == s('/'): segments = path.split(s('/')) elif not path: segments = bpath.split(s('/')) if not query: query = bquery else: segments = bpath.split(s('/'))[:-1] + path.split(s('/')) # If the rightmost part is "./" we want to keep the slash but # remove the dot. if segments[-1] == s('.'): segments[-1] = s('') # Resolve ".." and "." segments = [segment for segment in segments if segment != s('.')] while 1: i = 1 n = len(segments) - 1 while i < n: if segments[i] == s('..') and \ segments[i - 1] not in (s(''), s('..')): del segments[i - 1:i + 1] break i += 1 else: break # Remove trailing ".." if the URL is absolute unwanted_marker = [s(''), s('..')] while segments[:2] == unwanted_marker: del segments[1] path = s('/').join(segments) return url_unparse((scheme, netloc, path, query, fragment))
def make_line_iter(stream, limit=None, buffer_size=10 * 1024): """Safely iterates line-based over an input stream. If the input stream is not a :class:`LimitedStream` the `limit` parameter is mandatory. This uses the stream's :meth:`~file.read` method internally as opposite to the :meth:`~file.readline` method that is unsafe and can only be used in violation of the WSGI specification. The same problem applies to the `__iter__` function of the input stream which calls :meth:`~file.readline` without arguments. If you need line-by-line processing it's strongly recommended to iterate over the input stream using this helper function. :param stream: the stream or iterate to iterate over. :param limit: the limit in bytes for the stream. (Usually content length. Not necessary if the `stream` is a :class:`LimitedStream`. :param buffer_size: The optional buffer size. """ _iter = _make_chunk_iter(stream, limit, buffer_size) first_item = next(_iter, '') if not first_item: return s = make_literal_wrapper(first_item) empty = s('') cr = s('\r') lf = s('\n') crlf = s('\r\n') _iter = chain((first_item, ), _iter) def _iter_basic_lines(): _join = empty.join buffer = [] while 1: new_data = next(_iter, '') if not new_data: break new_buf = [] for item in chain(buffer, new_data.splitlines(True)): new_buf.append(item) if item and item[-1:] in crlf: yield _join(new_buf) new_buf = [] buffer = new_buf if buffer: yield _join(buffer) # This hackery is necessary to merge 'foo\r' and '\n' into one item # of 'foo\r\n' if we were unlucky and we hit a chunk boundary. previous = empty for item in _iter_basic_lines(): if item == lf and previous[-1:] == cr: previous += item item = empty if previous: yield previous previous = item if previous: yield previous