def __call__( self, environ: WSGIEnvironment, start_response: Callable, ) -> Callable: """Modify the WSGI environ based on the various ``Forwarded`` headers before calling the wrapped application. Store the original environ values in ``werkzeug.proxy_fix.orig_{key}``. """ environ_get = environ.get orig_remote_addr = environ_get("REMOTE_ADDR") orig_wsgi_url_scheme = environ_get("wsgi.url_scheme") orig_http_host = environ_get("HTTP_HOST") environ.update({ "werkzeug.proxy_fix.orig": { "REMOTE_ADDR": orig_remote_addr, "wsgi.url_scheme": orig_wsgi_url_scheme, "HTTP_HOST": orig_http_host, "SERVER_NAME": environ_get("SERVER_NAME"), "SERVER_PORT": environ_get("SERVER_PORT"), "SCRIPT_NAME": environ_get("SCRIPT_NAME"), } }) x_for = self._get_real_value(self.x_for, environ_get("HTTP_X_FORWARDED_FOR")) if x_for: environ["REMOTE_ADDR"] = x_for x_proto = self._get_real_value(self.x_proto, environ_get("HTTP_X_FORWARDED_PROTO")) if x_proto: environ["wsgi.url_scheme"] = x_proto x_host = self._get_real_value(self.x_host, environ_get("HTTP_X_FORWARDED_HOST")) if x_host: environ["HTTP_HOST"] = x_host parts = x_host.split(":", 1) environ["SERVER_NAME"] = parts[0] if len(parts) == 2: environ["SERVER_PORT"] = parts[1] x_port = self._get_real_value(self.x_port, environ_get("HTTP_X_FORWARDED_PORT")) if x_port: host = environ.get("HTTP_HOST") if host: parts = host.split(":", 1) host = parts[0] if len(parts) == 2 else host environ["HTTP_HOST"] = f"{host}:{x_port}" environ["SERVER_PORT"] = x_port x_prefix = self._get_real_value(self.x_prefix, environ_get("HTTP_X_FORWARDED_PREFIX")) if x_prefix: environ["SCRIPT_NAME"] = x_prefix return self.app(environ, start_response)
def pop_path_info( environ: WSGIEnvironment, charset: str = "utf-8", errors: str = "replace", ) -> Optional[str]: """Removes and returns the next segment of `PATH_INFO`, pushing it onto `SCRIPT_NAME`. Returns `None` if there is nothing left on `PATH_INFO`. If the `charset` is set to `None` bytes are returned. If there are empty segments (``'/foo//bar``) these are ignored but properly pushed to the `SCRIPT_NAME`: >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'} >>> pop_path_info(env) 'a' >>> env['SCRIPT_NAME'] '/foo/a' >>> pop_path_info(env) 'b' >>> env['SCRIPT_NAME'] '/foo/a/b' .. versionadded:: 0.5 .. versionchanged:: 0.9 The path is now decoded and a charset and encoding parameter can be provided. :param environ: the WSGI environment that is modified. :param charset: The ``encoding`` parameter passed to :func:`bytes.decode`. :param errors: The ``errors`` paramater passed to :func:`bytes.decode`. """ path = environ.get("PATH_INFO") if not path: return None script_name = environ.get("SCRIPT_NAME", "") # shift multiple leading slashes over old_path = path path = path.lstrip("/") if path != old_path: script_name += "/" * (len(old_path) - len(path)) if "/" not in path: environ["PATH_INFO"] = "" environ["SCRIPT_NAME"] = script_name + path rv = path.encode("latin1") else: segment, path = path.split("/", 1) environ["PATH_INFO"] = f"/{path}" environ["SCRIPT_NAME"] = script_name + segment rv = segment.encode("latin1") return _to_str(rv, charset, errors, allow_none_charset=True)
def get_current_url( environ: WSGIEnvironment, root_only: bool = False, strip_querystring: bool = False, host_only: bool = False, trusted_hosts: Optional[List[str]] = None, ) -> str: """A handy helper function that recreates the full URL as IRI for the current request or parts of it. Here's an example: >>> from werkzeug.test import create_environ >>> env = create_environ("/?param=foo", "http://localhost/script") >>> get_current_url(env) 'http://localhost/script/?param=foo' >>> get_current_url(env, root_only=True) 'http://localhost/script/' >>> get_current_url(env, host_only=True) 'http://localhost/' >>> get_current_url(env, strip_querystring=True) 'http://localhost/script/' This optionally it verifies that the host is in a list of trusted hosts. If the host is not in there it will raise a :exc:`~werkzeug.exceptions.SecurityError`. Note that the string returned might contain unicode characters as the representation is an IRI not an URI. If you need an ASCII only representation you can use the :func:`~werkzeug.urls.iri_to_uri` function: >>> from werkzeug.urls import iri_to_uri >>> iri_to_uri(get_current_url(env)) 'http://localhost/script/?param=foo' :param environ: the WSGI environment to get the current URL from. :param root_only: set `True` if you only want the root URL. :param strip_querystring: set to `True` if you don't want the querystring. :param host_only: set to `True` if the host URL should be returned. :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted` for more information. """ tmp = [environ["wsgi.url_scheme"], "://", get_host(environ, trusted_hosts)] cat = tmp.append if host_only: return uri_to_iri(f"{''.join(tmp)}/") cat(url_quote(environ.get("SCRIPT_NAME", "").encode("latin1")).rstrip("/")) cat("/") if not root_only: cat( url_quote( environ.get("PATH_INFO", "").encode("latin1").lstrip(b"/"))) if not strip_querystring: qs = get_query_string(environ) if qs: cat(f"?{qs}") return uri_to_iri("".join(tmp))
def inject_wsgi(self, environ: WSGIEnvironment) -> None: """Inject the cookies as client headers into the server's wsgi environment. """ cvals = [f"{c.name}={c.value}" for c in self] if cvals: environ["HTTP_COOKIE"] = "; ".join(cvals) else: environ.pop("HTTP_COOKIE", None)
def check_environ( self, environ: WSGIEnvironment, ) -> None: if type(environ) is not dict: warn( "WSGI environment is not a standard Python dict.", WSGIWarning, stacklevel=4, ) for key in ( "REQUEST_METHOD", "SERVER_NAME", "SERVER_PORT", "wsgi.version", "wsgi.input", "wsgi.errors", "wsgi.multithread", "wsgi.multiprocess", "wsgi.run_once", ): if key not in environ: warn( f"Required environment key {key!r} not found", WSGIWarning, stacklevel=3, ) if environ["wsgi.version"] != (1, 0): warn("Environ is not a WSGI 1.0 environ.", WSGIWarning, stacklevel=3) script_name = environ.get("SCRIPT_NAME", "") path_info = environ.get("PATH_INFO", "") if script_name and script_name[0] != "/": warn( f"'SCRIPT_NAME' does not start with a slash: {script_name!r}", WSGIWarning, stacklevel=3, ) if path_info and path_info[0] != "/": warn( f"'PATH_INFO' does not start with a slash: {path_info!r}", WSGIWarning, stacklevel=3, )
def peek_path_info( environ: WSGIEnvironment, charset: Optional[str] = "utf-8", errors: str = "replace", ) -> Optional[Union[str, bytes]]: """Returns the next segment on the `PATH_INFO` or `None` if there is none. Works like :func:`pop_path_info` without modifying the environment: >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'} >>> peek_path_info(env) 'a' >>> peek_path_info(env) 'a' If the `charset` is set to `None` bytes are returned. .. versionadded:: 0.5 .. versionchanged:: 0.9 The path is now decoded and a charset and encoding parameter can be provided. :param environ: the WSGI environment that is checked. """ segments = environ.get("PATH_INFO", "").lstrip("/").split("/", 1) if segments: return _to_str( segments[0].encode("latin1"), charset, errors, allow_none_charset=True, ) return None
def get_content_length(environ: WSGIEnvironment) -> Optional[int]: """Returns the content length from the WSGI environment as integer. If it's not available or chunked transfer encoding is used, ``None`` is returned. .. versionadded:: 0.9 :param environ: the WSGI environ to fetch the content length from. """ if environ.get("HTTP_TRANSFER_ENCODING", "") == "chunked": return None content_length = environ.get("CONTENT_LENGTH") if content_length is not None: try: return max(0, int(content_length)) except (ValueError, TypeError): pass return None
def append_slash_redirect(environ: WSGIEnvironment, code: int = 301,) -> "Response": """Redirects to the same URL but with a slash appended. The behavior of this function is undefined if the path ends with a slash already. :param environ: the WSGI environment for the request that triggers the redirect. :param code: the status code for the redirect. """ new_path = environ["PATH_INFO"].strip("/") + "/" query_string = environ.get("QUERY_STRING") if query_string: new_path += f"?{query_string}" return redirect(new_path, code)
def parse_from_environ( self, environ: WSGIEnvironment ) -> Tuple[BytesIO, Type[dict], Type[dict]]: """Parses the information from the environment as form data. :param environ: the WSGI environment to be used for parsing. :return: A tuple in the form ``(stream, form, files)``. """ content_type = environ.get("CONTENT_TYPE", "") content_length = get_content_length(environ) mimetype, options = parse_options_header(content_type) return self.parse(get_input_stream(environ), mimetype, content_length, options)
def get_query_string(environ: WSGIEnvironment) -> str: """Returns the ``QUERY_STRING`` from the WSGI environment. This also takes care of the WSGI decoding dance. The string returned will be restricted to ASCII characters. :param environ: WSGI environment to get the query string from. .. versionadded:: 0.9 """ qs = environ.get("QUERY_STRING", "").encode("latin1") # QUERY_STRING really should be ascii safe but some browsers # will send us some unicode stuff (I am looking at you IE). # In that case we want to urllib quote it badly. return url_quote(qs, safe=":&%=+$!*'(),")
def get_script_name( environ: WSGIEnvironment, charset: Optional[str] = "utf-8", errors: str = "replace", ) -> Union[str, bytes]: """Return the ``SCRIPT_NAME`` from the WSGI environment and decode it unless `charset` is set to ``None``. :param environ: WSGI environment to get the path from. :param charset: The charset for the path, or ``None`` if no decoding should be performed. :param errors: The decoding error handling. .. versionadded:: 0.9 """ path = environ.get("SCRIPT_NAME", "").encode("latin1") return _to_str(path, charset, errors, allow_none_charset=True)
def _process_range_request( self, environ: WSGIEnvironment, complete_length: Optional[int] = None, accept_ranges: Optional[Union[str, bool]] = None, ) -> bool: """Handle Range Request related headers (RFC7233). If `Accept-Ranges` header is valid, and Range Request is processable, we set the headers as described by the RFC, and wrap the underlying response in a RangeWrapper. Returns ``True`` if Range Request can be fulfilled, ``False`` otherwise. :raises: :class:`~werkzeug.exceptions.RequestedRangeNotSatisfiable` if `Range` header could not be parsed or satisfied. """ from ..exceptions import RequestedRangeNotSatisfiable if (accept_ranges is None or complete_length is None or not self._is_range_request_processable(environ)): return False parsed_range = parse_range_header(environ.get("HTTP_RANGE")) if parsed_range is None: raise RequestedRangeNotSatisfiable(complete_length) range_tuple = parsed_range.range_for_length(complete_length) content_range_header = parsed_range.to_content_range_header( complete_length) if range_tuple is None or content_range_header is None: raise RequestedRangeNotSatisfiable(complete_length) content_length = range_tuple[1] - range_tuple[0] self.headers["Content-Length"] = content_length self.headers["Accept-Ranges"] = accept_ranges self.content_range = content_range_header self.status_code = 206 self._wrap_response(range_tuple[0], content_length) return True
def wrap_file( environ: WSGIEnvironment, file: Union[FileIO, BufferedReader], buffer_size: int = 8192, ) -> "FileWrapper": """Wraps a file. This uses the WSGI server's file wrapper if available or otherwise the generic :class:`FileWrapper`. .. versionadded:: 0.5 If the file wrapper from the WSGI server is used it's important to not iterate over it from inside the application but to pass it through unchanged. If you want to pass out a file wrapper inside a response object you have to set :attr:`~BaseResponse.direct_passthrough` to `True`. More information about file wrappers are available in :pep:`333`. :param file: a :class:`file`-like object with a :meth:`~file.read` method. :param buffer_size: number of bytes for one iteration. """ return environ.get("wsgi.file_wrapper", FileWrapper)(file, buffer_size)
def get_input_stream( environ: WSGIEnvironment, safe_fallback: bool = True) -> Union[BytesIO, "LimitedStream"]: """Returns the input stream from the WSGI environment and wraps it in the most sensible way possible. The stream returned is not the raw WSGI stream in most cases but one that is safe to read from without taking into account the content length. If content length is not set, the stream will be empty for safety reasons. If the WSGI server supports chunked or infinite streams, it should set the ``wsgi.input_terminated`` value in the WSGI environ to indicate that. .. versionadded:: 0.9 :param environ: the WSGI environ to fetch the stream from. :param safe_fallback: use an empty stream as a safe fallback when the content length is not set. Disabling this allows infinite streams, which can be a denial-of-service risk. """ stream = environ["wsgi.input"] content_length = get_content_length(environ) # A wsgi extension that tells us if the input is terminated. In # that case we return the stream unchanged as we know we can safely # read it until the end. if environ.get("wsgi.input_terminated"): return stream # If the request doesn't specify a content length, returning the stream is # potentially dangerous because it could be infinite, malicious or not. If # safe_fallback is true, return an empty stream instead for safety. if content_length is None: return io.BytesIO() if safe_fallback else stream # Otherwise limit the stream to the content length return LimitedStream(stream, content_length)
def is_resource_modified( environ: WSGIEnvironment, etag: Optional[str] = None, data: Optional[Union[bytes, str]] = None, last_modified: Optional[datetime] = None, ignore_if_range: bool = True, ) -> bool: """Convenience method for conditional requests. :param environ: the WSGI environment of the request to be checked. :param etag: the etag for the response for comparison. :param data: or alternatively the data of the response to automatically generate an etag using :func:`generate_etag`. :param last_modified: an optional date of the last modification. :param ignore_if_range: If `False`, `If-Range` header will be taken into account. :return: `True` if the resource was modified, otherwise `False`. .. versionchanged:: 1.0.0 The check is run for methods other than ``GET`` and ``HEAD``. """ if etag is None and data is not None: etag = generate_etag(data) # type: ignore elif data is not None: raise TypeError("both data and etag given") unmodified = False if isinstance(last_modified, str): last_modified = parse_date(last_modified) # ensure that microsecond is zero because the HTTP spec does not transmit # that either and we might have some false positives. See issue #39 if last_modified is not None: last_modified = last_modified.replace(microsecond=0) if_range = None if not ignore_if_range and "HTTP_RANGE" in environ: # https://tools.ietf.org/html/rfc7233#section-3.2 # A server MUST ignore an If-Range header field received in a request # that does not contain a Range header field. if_range = parse_if_range_header(environ.get("HTTP_IF_RANGE")) if if_range is not None and if_range.date is not None: modified_since = if_range.date else: modified_since = parse_date(environ.get("HTTP_IF_MODIFIED_SINCE")) if modified_since and last_modified and last_modified <= modified_since: unmodified = True if etag: etag, _ = unquote_etag(etag) if if_range is not None and if_range.etag is not None: unmodified = parse_etags(if_range.etag).contains(etag) else: if_none_match = parse_etags(environ.get("HTTP_IF_NONE_MATCH")) if if_none_match: # https://tools.ietf.org/html/rfc7232#section-3.2 # "A recipient MUST use the weak comparison function when comparing # entity-tags for If-None-Match" unmodified = if_none_match.contains_weak(etag) # https://tools.ietf.org/html/rfc7232#section-3.1 # "Origin server MUST use the strong comparison function when # comparing entity-tags for If-Match" if_match = parse_etags(environ.get("HTTP_IF_MATCH")) if if_match: unmodified = not if_match.is_strong(etag) return not unmodified