def dump_cookie(key, value='', max_age=None, expires=None, path='/', domain=None, secure=None, httponly=False, sync_expires=True): """Creates a new Set-Cookie header without the ``Set-Cookie`` prefix The parameters are the same as in the cookie Morsel object in the Python standard library but it accepts unicode data, too. :param max_age: should be a number of seconds, or `None` (default) if the cookie should last only as long as the client's browser session. Additionally `timedelta` objects are accepted, too. :param expires: should be a `datetime` object or unix timestamp. :param path: limits the cookie to a given path, per default it will span the whole domain. :param domain: Use this if you want to set a cross-domain cookie. For example, ``domain=".example.com"`` will set a cookie that is readable by the domain ``www.example.com``, ``foo.example.com`` etc. Otherwise, a cookie will only be readable by the domain that set it. :param secure: The cookie will only be available via HTTPS :param httponly: disallow JavaScript to access the cookie. This is an extension to the cookie standard and probably not supported by all browsers. :param charset: the encoding for unicode values. :param sync_expires: automatically set expires if max_age is defined but expires not. """ if not isinstance(key, (bytes, text_type)): raise TypeError('invalid key %r' % key) if not isinstance(value, (bytes, text_type)): raise TypeError('invalid value %r' % value) key, value = to_native(key, _cookie_charset), to_native(value, _cookie_charset) value = quote_header_value(value) morsel = _ExtendedMorsel(key, value) if isinstance(max_age, timedelta): max_age = (max_age.days * 60 * 60 * 24) + max_age.seconds if expires is not None: if not isinstance(expires, string_types): expires = cookie_date(expires) morsel['expires'] = expires elif max_age is not None and sync_expires: morsel['expires'] = cookie_date(time() + max_age) if domain and ':' in domain: # The port part of the domain should NOT be used. Strip it domain = domain.split(':', 1)[0] if domain: assert '.' in domain, ( "Setting \"domain\" for a cookie on a server running localy (ex: " "localhost) is not supportted by complying browsers. You should " "have something like: \"127.0.0.1 localhost dev.localhost\" on " "your hosts file and then point your server to run on " "\"dev.localhost\" and also set \"domain\" for \"dev.localhost\"" ) for k, v in (('path', path), ('domain', domain), ('secure', secure), ('max-age', max_age), ('httponly', httponly)): if v is not None and v is not False: morsel[k] = str(v) return to_unicode(morsel.output(header='').lstrip(), _cookie_charset)
def sourcelines(self): """The sourcecode of the file as list of unicode strings.""" # get sourcecode from loader or file source = None if self.loader is not None: try: if hasattr(self.loader, 'get_source'): source = self.loader.get_source(self.module) elif hasattr(self.loader, 'get_source_by_code'): source = self.loader.get_source_by_code(self.code) except Exception: # we munch the exception so that we don't cause troubles # if the loader is broken. pass if source is None: try: f = open(to_native(self.filename, get_filesystem_encoding()), mode='rb') except IOError: return [] try: source = f.read() finally: f.close() # already unicode? return right away if isinstance(source, text_type): return source.splitlines() # yes. it should be ascii, but we don't want to reject too many # characters in the debugger if something breaks charset = 'utf-8' if source.startswith(UTF8_COOKIE): source = source[3:] else: for idx, match in enumerate(_line_re.finditer(source)): match = _coding_re.search(match.group()) if match is not None: charset = match.group(1) break if idx > 1: break # on broken cookies we fall back to utf-8 too charset = to_native(charset) try: codecs.lookup(charset) except LookupError: charset = 'utf-8' return source.decode(charset, 'replace').splitlines()
def iri_to_uri(iri, charset='utf-8', errors='strict'): r""" Converts any unicode based IRI to an acceptable ASCII URI. Werkzeug always uses utf-8 URLs internally because this is what browsers and HTTP do as well. In some places where it accepts an URL it also accepts a unicode IRI and converts it into a URI. Examples for IRI versus URI: >>> iri_to_uri(u'http://☃.net/') 'http://xn--n3h.net/' >>> iri_to_uri(u'http://üser:pässword@☃.net/påth') 'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th' .. versionadded:: 0.6 :param iri: The IRI to convert. :param charset: The charset for the URI. """ if isinstance(iri, tuple): iri = url_unparse(iri) iri = url_parse(to_unicode(iri, charset, errors)) netloc = iri.encode_netloc().decode('ascii') path = url_quote(iri.path, charset, errors, '/:~+%') query = url_quote(iri.query, charset, errors, '%&[]:;$*()+,!?*/=') fragment = url_quote(iri.fragment, charset, errors, '=%&[]:;$()+,!?*/') return to_native(url_unparse((iri.scheme, netloc, path, query, fragment)))
def test_shared_data_middleware(self): def null_application(environ, start_response): start_response('404 NOT FOUND', [('Content-Type', 'text/plain')]) yield b'NOT FOUND' test_dir = get_temporary_directory() with open(path.join(test_dir, to_native(u'äöü', 'utf-8')), 'w') as test_file: test_file.write(u'FOUND') app = wsgi.SharedDataMiddleware(null_application, { '/': path.join(path.dirname(__file__), 'res'), '/sources': path.join(path.dirname(__file__), 'res'), '/pkg': ('werkzeug.debug', 'shared'), '/foo': test_dir }) for p in '/test.txt', '/sources/test.txt', '/foo/äöü': app_iter, status, headers = run_wsgi_app(app, create_environ(p)) self.assert_equal(status, '200 OK') with closing(app_iter) as app_iter: data = b''.join(app_iter).strip() self.assert_equal(data, b'FOUND') app_iter, status, headers = run_wsgi_app( app, create_environ('/pkg/debugger.js')) with closing(app_iter) as app_iter: contents = b''.join(app_iter) self.assert_in(b'$(function() {', contents) app_iter, status, headers = run_wsgi_app( app, create_environ('/missing')) self.assert_equal(status, '404 NOT FOUND') self.assert_equal(b''.join(app_iter).strip(), b'NOT FOUND')
def url_encode(obj, charset='utf-8', encode_keys=False, sort=False, key=None, separator=b'&'): """URL encode a dict/`MultiDict`. If a value is `None` it will not appear in the result string. Per default only values are encoded into the target charset strings. If `encode_keys` is set to ``True`` unicode keys are supported too. If `sort` is set to `True` the items are sorted by `key` or the default sorting algorithm. .. versionadded:: 0.5 `sort`, `key`, and `separator` were added. :param obj: the object to encode into a query string. :param charset: the charset of the query string. :param encode_keys: set to `True` if you have unicode keys. (Ignored on Python 3.x) :param sort: set to `True` if you want parameters to be sorted by `key`. :param separator: the separator to be used for the pairs. :param key: an optional function to be used for sorting. For more details check out the :func:`sorted` documentation. """ separator = to_native(separator, 'ascii') return separator.join( _url_encode_impl(obj, charset, encode_keys, sort, key))
def parse_multipart_headers(iterable): """Parses multipart headers from an iterable that yields lines (including the trailing newline symbol). The iterable has to be newline terminated. The iterable will stop at the line where the headers ended so it can be further consumed. :param iterable: iterable of strings that are newline terminated """ result = [] for line in iterable: line = to_native(line) line, line_terminated = _line_parse(line) if not line_terminated: raise ValueError("unexpected end of line in multipart header") if not line: break elif line[0] in " \t" and result: key, value = result[-1] result[-1] = (key, value + "\n " + line[1:]) else: parts = line.split(":", 1) if len(parts) == 2: result.append((parts[0].strip(), parts[1].strip())) # we link the list to the headers, no need to create a copy, the # list was not shared anyways. return Headers(result)
def test_shared_data_middleware(tmpdir): def null_application(environ, start_response): start_response('404 NOT FOUND', [('Content-Type', 'text/plain')]) yield b'NOT FOUND' test_dir = str(tmpdir) with open(path.join(test_dir, to_native(u'äöü', 'utf-8')), 'w') as test_file: test_file.write(u'FOUND') for t in [list, dict]: app = wsgi.SharedDataMiddleware(null_application, t([ ('/', path.join(path.dirname(__file__), 'res')), ('/sources', path.join(path.dirname(__file__), 'res')), ('/pkg', ('werkzeug.debug', 'shared')), ('/foo', test_dir) ])) for p in '/test.txt', '/sources/test.txt', '/foo/äöü': app_iter, status, headers = run_wsgi_app(app, create_environ(p)) assert status == '200 OK' with closing(app_iter) as app_iter: data = b''.join(app_iter).strip() assert data == b'FOUND' app_iter, status, headers = run_wsgi_app( app, create_environ('/pkg/debugger.js')) with closing(app_iter) as app_iter: contents = b''.join(app_iter) assert b'$(function() {' in contents app_iter, status, headers = run_wsgi_app( app, create_environ('/missing')) assert status == '404 NOT FOUND' assert b''.join(app_iter).strip() == b'NOT FOUND'
def parse_multipart_headers(iterable): """Parses multipart headers from an iterable that yields lines (including the trailing newline symbol). The iterable has to be newline terminated. The iterable will stop at the line where the headers ended so it can be further consumed. :param iterable: iterable of strings that are newline terminated """ result = [] for line in iterable: line = to_native(line) line, line_terminated = _line_parse(line) if not line_terminated: raise ValueError('unexpected end of line in multipart header') if not line: break elif line[0] in ' \t' and result: key, value = result[-1] result[-1] = (key, value + '\n ' + line[1:]) else: parts = line.split(':', 1) if len(parts) == 2: result.append((parts[0].strip(), parts[1].strip())) # we link the list to the headers, no need to create a copy, the # list was not shared anyways. return Headers(result)
def url_fix(s, charset='utf-8'): r"""Sometimes you get an URL by a user that just isn't a real URL because it contains unsafe characters like ' ' and so on. This function can fix some of the problems in a similar way browsers handle data entered by the user: >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)' :param s: the string with the URL to fix. :param charset: The target charset for the URL if the url was given as unicode string. """ # First step is to switch to unicode processing and to convert # backslashes (which are invalid in URLs anyways) to slashes. This is # consistent with what Chrome does. s = to_unicode(s, charset, 'replace').replace('\\', '/') # For the specific case that we look like a malformed windows URL # we want to fix this up manually: if s.startswith('file://') and s[7:8].isalpha() and s[8:10] in (':/', '|/'): s = 'file:///' + s[7:] url = url_parse(s) path = url_quote(url.path, charset, safe='/%+$!*\'(),') qs = url_quote_plus(url.query, charset, safe=':&%=+$!*\'(),') anchor = url_quote_plus(url.fragment, charset, safe=':&%=+$!*\'(),') return to_native( url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor)))
def url_fix(s, charset='utf-8'): r"""Sometimes you get an URL by a user that just isn't a real URL because it contains unsafe characters like ' ' and so on. This function can fix some of the problems in a similar way browsers handle data entered by the user: >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)' :param s: the string with the URL to fix. :param charset: The target charset for the URL if the url was given as unicode string. """ # First step is to switch to unicode processing and to convert # backslashes (which are invalid in URLs anyways) to slashes. This is # consistent with what Chrome does. s = to_unicode(s, charset, 'replace').replace('\\', '/') # For the specific case that we look like a malformed windows URL # we want to fix this up manually: if s.startswith('file://') and s[7:8].isalpha() and s[8:10] in (':/', '|/'): s = 'file:///' + s[7:] url = url_parse(s) path = url_quote(url.path, charset, safe='/%+$!*\'(),') qs = url_quote_plus(url.query, charset, safe=':&%=+$!*\'(),') anchor = url_quote_plus(url.fragment, charset, safe=':&%=+$!*\'(),') return to_native(url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor)))
def test_shared_data_middleware(tmpdir): def null_application(environ, start_response): start_response("404 NOT FOUND", [("Content-Type", "text/plain")]) yield b"NOT FOUND" test_dir = str(tmpdir) with open(path.join(test_dir, to_native(u"äöü", "utf-8")), "w") as test_file: test_file.write(u"FOUND") app = wsgi.SharedDataMiddleware( null_application, { "/": path.join(path.dirname(__file__), "res"), "/sources": path.join(path.dirname(__file__), "res"), "/pkg": ("werkzeug.debug", "shared"), "/foo": test_dir, }, ) for p in "/test.txt", "/sources/test.txt", "/foo/äöü": app_iter, status, headers = run_wsgi_app(app, create_environ(p)) assert status == "200 OK" with closing(app_iter) as app_iter: data = b"".join(app_iter).strip() assert data == b"FOUND" app_iter, status, headers = run_wsgi_app(app, create_environ("/pkg/debugger.js")) with closing(app_iter) as app_iter: contents = b"".join(app_iter) assert b"$(function() {" in contents app_iter, status, headers = run_wsgi_app(app, create_environ("/missing")) assert status == "404 NOT FOUND" assert b"".join(app_iter).strip() == b"NOT FOUND"
def url_quote(string, charset='utf-8', errors='strict', safe='/:', unsafe=''): """URL encode a single string with a given encoding. :param s: the string to quote. :param charset: the charset to be used. :param safe: an optional sequence of safe characters. :param unsafe: an optional sequence of unsafe characters. .. versionadded:: 0.9.2 The `unsafe` parameter was added. """ if not isinstance(string, (text_type, bytes, bytearray)): string = text_type(string) if isinstance(string, text_type): string = string.encode(charset, errors) if isinstance(safe, text_type): safe = safe.encode(charset, errors) if isinstance(unsafe, text_type): unsafe = unsafe.encode(charset, errors) safe = frozenset(bytearray(safe) + _always_safe) - frozenset(bytearray(unsafe)) rv = bytearray() for char in bytearray(string): if char in safe: rv.append(char) else: rv.extend(('%%%02X' % char).encode('ascii')) return to_native(bytes(rv))
def url_encode_stream(obj, stream=None, charset='utf-8', encode_keys=False, sort=False, key=None, separator=b'&'): """Like :meth:`url_encode` but writes the results to a stream object. If the stream is `None` a generator over all encoded pairs is returned. .. versionadded:: 0.8 :param obj: the object to encode into a query string. :param stream: a stream to write the encoded object into or `None` if an iterator over the encoded pairs should be returned. In that case the separator argument is ignored. :param charset: the charset of the query string. :param encode_keys: set to `True` if you have unicode keys. (Ignored on Python 3.x) :param sort: set to `True` if you want parameters to be sorted by `key`. :param separator: the separator to be used for the pairs. :param key: an optional function to be used for sorting. For more details check out the :func:`sorted` documentation. """ separator = to_native(separator, 'ascii') gen = _url_encode_impl(obj, charset, encode_keys, sort, key) if stream is None: return gen for idx, chunk in enumerate(gen): if idx: stream.write(separator) stream.write(chunk)
def test_shared_data_middleware(tmpdir): def null_application(environ, start_response): start_response('404 NOT FOUND', [('Content-Type', 'text/plain')]) yield b'NOT FOUND' test_dir = str(tmpdir) with open(path.join(test_dir, to_native(u'äöü', 'utf-8')), 'w') as test_file: test_file.write(u'FOUND') app = wsgi.SharedDataMiddleware( null_application, { '/': path.join(path.dirname(__file__), 'res'), '/sources': path.join(path.dirname(__file__), 'res'), '/pkg': ('werkzeug.debug', 'shared'), '/foo': test_dir }) for p in '/test.txt', '/sources/test.txt', '/foo/äöü': app_iter, status, headers = run_wsgi_app(app, create_environ(p)) assert status == '200 OK' with closing(app_iter) as app_iter: data = b''.join(app_iter).strip() assert data == b'FOUND' app_iter, status, headers = run_wsgi_app( app, create_environ('/pkg/debugger.js')) with closing(app_iter) as app_iter: contents = b''.join(app_iter) assert b'$(function() {' in contents app_iter, status, headers = run_wsgi_app(app, create_environ('/missing')) assert status == '404 NOT FOUND' assert b''.join(app_iter).strip() == b'NOT FOUND'
def url_quote(string, charset='utf-8', errors='strict', safe='/:', unsafe=''): """URL encode a single string with a given encoding. :param s: the string to quote. :param charset: the charset to be used. :param safe: an optional sequence of safe characters. :param unsafe: an optional sequence of unsafe characters. .. versionadded:: 0.9.2 The `unsafe` parameter was added. """ if not isinstance(string, (text_type, bytes, bytearray)): string = text_type(string) if isinstance(string, text_type): string = string.encode(charset, errors) if isinstance(safe, text_type): safe = safe.encode(charset, errors) if isinstance(unsafe, text_type): unsafe = unsafe.encode(charset, errors) safe = frozenset(bytearray(safe) + _always_safe) - frozenset( bytearray(unsafe)) rv = bytearray() for char in bytearray(string): if char in safe: rv.append(char) else: rv.extend(_bytetohex[char]) return to_native(bytes(rv))
def ascii_host(self): """Works exactly like :attr:`host` but will return a result that is restricted to ASCII. If it finds a netloc that is not ASCII it will attempt to idna decode it. This is useful for socket operations when the URL might include internationalized characters. """ rv = self.host if rv is not None and isinstance(rv, text_type): rv = _encode_idna(rv) return to_native(rv, 'ascii', 'ignore')
def port(self): """The port in the URL as an integer if it was present, `None` otherwise. This does not fill in default ports. """ try: rv = int(to_native(self._split_host()[1])) if 0 <= rv <= 65535: return rv except (ValueError, TypeError): pass
def __repr__(self): # make sure the __repr__ even works if the request was created # from an invalid WSGI environment. If we display the request # in a debug session we don't want the repr to blow up. args = [] try: args.append("'%s'" % to_native(self.url, self.url_charset)) args.append('[%s]' % self.method) except Exception: args.append('(invalid WSGI environ)') return '<%s %s>' % (self.__class__.__name__, ' '.join(args))
def unserialize(cls, string, secret_key): """Load the secure cookie from a serialized string. :param string: the cookie value to unserialize. :param secret_key: the secret key used to serialize the cookie. :return: a new :class:`SecureCookie`. """ if isinstance(string, text_type): string = string.encode('utf-8', 'replace') if isinstance(secret_key, text_type): secret_key = secret_key.encode('utf-8', 'replace') try: base64_hash, data = string.split(b'?', 1) except (ValueError, IndexError): items = () else: items = {} mac = hmac(secret_key, None, cls.hash_method) for item in data.split(b'&'): mac.update(b'|' + item) if not b'=' in item: items = None break key, value = item.split(b'=', 1) # try to make the key a string key = url_unquote_plus(key.decode('ascii')) try: key = to_native(key) except UnicodeError: pass items[key] = value # no parsing error and the mac looks okay, we can now # sercurely unpickle our cookie. try: client_hash = base64.b64decode(base64_hash) except TypeError: items = client_hash = None if items is not None and safe_str_cmp(client_hash, mac.digest()): try: for key, value in iteritems(items): items[key] = cls.unquote(value) except UnquoteError: items = () else: if '_expires' in items: if time() > items['_expires']: items = () else: del items['_expires'] else: items = () return cls(items, secret_key, False)
def handler(event, context): """ Lambda event handler, invokes the WSGI wrapper and handles command invocation """ if "_serverless-wsgi" in event: import shlex import subprocess from werkzeug._compat import StringIO, to_native native_stdout = sys.stdout native_stderr = sys.stderr output_buffer = StringIO() try: sys.stdout = output_buffer sys.stderr = output_buffer meta = event["_serverless-wsgi"] if meta.get("command") == "exec": # Evaluate Python code exec(meta.get("data", "")) elif meta.get("command") == "command": # Run shell commands result = subprocess.check_output(meta.get("data", ""), shell=True, stderr=subprocess.STDOUT) output_buffer.write(to_native(result)) elif meta.get("command") == "manage": # Run Django management commands from django.core import management management.call_command(*shlex.split(meta.get("data", ""))) elif meta.get("command") == "flask": # Run Flask CLI commands from flask.cli import ScriptInfo wsgi_app.cli.main( shlex.split(meta.get("data", "")), standalone_mode=False, obj=ScriptInfo(create_app=_create_app), ) else: raise Exception("Unknown command: {}".format( meta.get("command"))) except: # noqa return traceback.format_exc() finally: sys.stdout = native_stdout sys.stderr = native_stderr return output_buffer.getvalue() else: return serverless_wsgi.handle_request(wsgi_app, event, context)
def __init__(self, servers=None, default_timeout=300, key_prefix=None): BaseCache.__init__(self, default_timeout) if servers is None or isinstance(servers, (list, tuple)): if servers is None: servers = ['127.0.0.1:11211'] self._client = self.import_preferred_memcache_lib(servers) if self._client is None: raise RuntimeError('no memcache module found') else: # NOTE: servers is actually an already initialized memcache # client. self._client = servers self.key_prefix = to_native(key_prefix)
def test_shared_data_middleware(tmpdir): def null_application(environ, start_response): start_response("404 NOT FOUND", [("Content-Type", "text/plain")]) yield b"NOT FOUND" test_dir = str(tmpdir) with open(os.path.join(test_dir, to_native(u"äöü", "utf-8")), "w") as test_file: test_file.write(u"FOUND") for t in [list, dict]: app = SharedDataMiddleware( null_application, t( [ ("/", os.path.join(os.path.dirname(__file__), "..", "res")), ("/sources", os.path.join(os.path.dirname(__file__), "..", "res")), ("/pkg", ("werkzeug.debug", "shared")), ("/foo", test_dir), ] ), ) for p in "/test.txt", "/sources/test.txt", "/foo/äöü": app_iter, status, headers = run_wsgi_app(app, create_environ(p)) assert status == "200 OK" if p.endswith(".txt"): content_type = next(v for k, v in headers if k == "Content-Type") assert content_type == "text/plain; charset=utf-8" with closing(app_iter) as app_iter: data = b"".join(app_iter).strip() assert data == b"FOUND" app_iter, status, headers = run_wsgi_app( app, create_environ("/pkg/debugger.js") ) with closing(app_iter) as app_iter: contents = b"".join(app_iter) assert b"$(function() {" in contents for path in ("/missing", "/pkg", "/pkg/", "/pkg/missing.txt"): app_iter, status, headers = run_wsgi_app(app, create_environ(path)) assert status == "404 NOT FOUND" assert b"".join(app_iter).strip() == b"NOT FOUND"
def encode_netloc(self): """Encodes the netloc part to an ASCII safe URL as bytes.""" rv = self.ascii_host or '' if ':' in rv: rv = '[%s]' % rv port = self.port if port is not None: rv = '%s:%d' % (rv, port) auth = ':'.join(filter(None, [ url_quote(self.raw_username or '', 'utf-8', 'strict', '/:%'), url_quote(self.raw_password or '', 'utf-8', 'strict', '/:%'), ])) if auth: rv = '%s@%s' % (auth, rv) return to_native(rv)
def handler(event, context): if "Records" in event: process_sqs_messages(event) return """Lambda event handler, invokes the WSGI wrapper and handles command invocation""" if "_serverless-wsgi" in event: import shlex import subprocess from werkzeug._compat import StringIO, to_native native_stdout = sys.stdout native_stderr = sys.stderr output_buffer = StringIO() try: sys.stdout = output_buffer sys.stderr = output_buffer meta = event["_serverless-wsgi"] if meta.get("command") == "exec": # Evaluate Python code exec(meta.get("data", "")) elif meta.get("command2") == "command": # Run shell commands result = subprocess.check_output(meta.get("data", ""), shell=True, stderr=subprocess.STDOUT) output_buffer.write(to_native(result)) elif meta.get("command") == "manage": # Run Django management commands from django.core import management management.call_command(*shlex.split(meta.get("data", ""))) else: raise Exception("Unknown command: {}".format( meta.get("command"))) except subprocess.CalledProcessError as e: return [e.returncode, e.output.decode("utf-8")] except: # noqa return [1, traceback.format_exc()] finally: sys.stdout = native_stdout sys.stderr = native_stderr return [0, output_buffer.getvalue()] else: return serverless_wsgi.handle_request(wsgi_app, event, context)
def __call__(self, *path, **query): if path and isinstance(path[-1], dict): if query: raise TypeError("keyword arguments and query-dicts " "can't be combined") query, path = path[-1], path[:-1] elif query: query = dict([(k.endswith("_") and k[:-1] or k, v) for k, v in query.items()]) path = "/".join([to_unicode(url_quote(x, self.charset), "ascii") for x in path if x is not None]).lstrip("/") rv = self.base if path: if not rv.endswith("/"): rv += "/" rv = url_join(rv, "./" + path) if query: rv += "?" + to_unicode(url_encode(query, self.charset, sort=self.sort, key=self.key), "ascii") return to_native(rv)
def pbkdf2_hex(data, salt, iterations=DEFAULT_PBKDF2_ITERATIONS, keylen=None, hashfunc=None): """Like :func:`pbkdf2_bin` but returns a hex encoded string. .. versionadded:: 0.9 :param data: the data to derive. :param salt: the salt for the derivation. :param iterations: the number of iterations. :param keylen: the length of the resulting key. If not provided the digest size will be used. :param hashfunc: the hash function to use. This can either be the string name of a known hash function or a function from the hashlib module. Defaults to sha1. """ rv = pbkdf2_bin(data, salt, iterations, keylen, hashfunc) return to_native(codecs.encode(rv, "hex_codec"))
def url_parse(url, scheme=None, allow_fragments=True): """Parses a URL from a string into a :class:`URL` tuple. If the URL is lacking a scheme it can be provided as second argument. Otherwise, it is ignored. Optionally fragments can be stripped from the URL by setting `allow_fragments` to `False`. The inverse of this function is :func:`url_unparse`. :param url: the URL to parse. :param scheme: the default schema to use if the URL is schemaless. :param allow_fragments: if set to `False` a fragment will be removed from the URL. """ s = make_literal_wrapper(url) is_text_based = isinstance(url, text_type) if scheme is None: scheme = s("") netloc = query = fragment = s("") i = url.find(s(":")) if i > 0 and _scheme_re.match(to_native(url[:i], errors="replace")): # make sure "iri" is not actually a port number (in which case # "scheme" is really part of the path) rest = url[i + 1:] if not rest or any(c not in s("0123456789") for c in rest): # not a port number scheme, url = url[:i].lower(), rest if url[:2] == s("//"): delim = len(url) for c in s("/?#"): wdelim = url.find(c, 2) if wdelim >= 0: delim = min(delim, wdelim) netloc, url = url[2:delim], url[delim:] if (s("[") in netloc and s("]") not in netloc) or (s("]") in netloc and s("[") not in netloc): raise ValueError("Invalid IPv6 URL") if allow_fragments and s("#") in url: url, fragment = url.split(s("#"), 1) if s("?") in url: url, query = url.split(s("?"), 1) result_type = is_text_based and URL or BytesURL return result_type(scheme, netloc, url, query, fragment)
def pbkdf2_hex(data, salt, iterations=DEFAULT_PBKDF2_ITERATIONS, keylen=None, hashfunc=None): """Like :func:`pbkdf2_bin` but returns a hex encoded string. .. versionadded:: 0.9 :param data: the data to derive. :param salt: the salt for the derivation. :param iterations: the number of iterations. :param keylen: the length of the resulting key. If not provided the digest size will be used. :param hashfunc: the hash function to use. This can either be the string name of a known hash function or a function from the hashlib module. Defaults to sha1. """ rv = pbkdf2_bin(data, salt, iterations, keylen, hashfunc) return to_native(codecs.encode(rv, 'hex_codec'))
def url_fix(s, charset='utf-8'): r"""Sometimes you get an URL by a user that just isn't a real URL because it contains unsafe characters like ' ' and so on. This function can fix some of the problems in a similar way browsers handle data entered by the user: >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)' :param s: the string with the URL to fix. :param charset: The target charset for the URL if the url was given as unicode string. """ scheme, netloc, path, qs, anchor = url_parse(to_unicode(s, charset, 'replace')) path = url_quote(path, charset, safe='/%+$!*\'(),') qs = url_quote_plus(qs, charset, safe=':&%=+$!*\'(),') return to_native(url_unparse((scheme, netloc, path, qs, anchor)))
def url_parse(url, scheme=None, allow_fragments=True): """Parses a URL from a string into a :class:`URL` tuple. If the URL is lacking a scheme it can be provided as second argument. Otherwise, it is ignored. Optionally fragments can be stripped from the URL by setting `allow_fragments` to `False`. The inverse of this function is :func:`url_unparse`. :param url: the URL to parse. :param scheme: the default schema to use if the URL is schemaless. :param allow_fragments: if set to `False` a fragment will be removed from the URL. """ s = make_literal_wrapper(url) is_text_based = isinstance(url, text_type) if scheme is None: scheme = s('') netloc = query = fragment = s('') i = url.find(s(':')) if i > 0 and _scheme_re.match(to_native(url[:i], errors='replace')): # make sure "iri" is not actually a port number (in which case # "scheme" is really part of the path) rest = url[i + 1:] if not rest or any(c not in s('0123456789') for c in rest): # not a port number scheme, url = url[:i].lower(), rest if url[:2] == s('//'): delim = len(url) for c in s('/?#'): wdelim = url.find(c, 2) if wdelim >= 0: delim = min(delim, wdelim) netloc, url = url[2:delim], url[delim:] if ((s('[') in netloc and s(']') not in netloc) or (s(']') in netloc and s('[') not in netloc)): raise ValueError('Invalid IPv6 URL') if allow_fragments and s('#') in url: url, fragment = url.split(s('#'), 1) if s('?') in url: url, query = url.split(s('?'), 1) result_type = is_text_based and URL or BytesURL return result_type(scheme, netloc, url, query, fragment)
def url_quote(string, charset='utf-8', errors='strict', safe='/:'): """URL encode a single string with a given encoding. :param s: the string to quote. :param charset: the charset to be used. :param safe: an optional sequence of safe characters. """ if isinstance(string, text_type): string = string.encode(charset, errors) if isinstance(safe, text_type): safe = safe.encode(charset, errors) safe = frozenset(bytearray(safe) + _always_safe) rv = bytearray() for char in bytearray(string): if char in safe: rv.append(char) else: rv.extend(('%%%X' % char).encode('ascii')) return to_native(bytes(rv))
def __call__(self, *path, **query): if path and isinstance(path[-1], dict): if query: raise TypeError('keyword arguments and query-dicts ' 'can\'t be combined') query, path = path[-1], path[:-1] elif query: query = dict([(k.endswith('_') and k[:-1] or k, v) for k, v in query.items()]) path = '/'.join([to_unicode(url_quote(x, self.charset), 'ascii') for x in path if x is not None]).lstrip('/') rv = self.base if path: if not rv.endswith('/'): rv += '/' rv = url_join(rv, './' + path) if query: rv += '?' + to_unicode(url_encode(query, self.charset, sort=self.sort, key=self.key), 'ascii') return to_native(rv)
def encode_netloc(self): """Encodes the netloc part to an ASCII safe URL as bytes.""" rv = self.ascii_host or "" if ":" in rv: rv = "[%s]" % rv port = self.port if port is not None: rv = "%s:%d" % (rv, port) auth = ":".join( filter( None, [ url_quote(self.raw_username or "", "utf-8", "strict", "/:%"), url_quote(self.raw_password or "", "utf-8", "strict", "/:%"), ], )) if auth: rv = "%s@%s" % (auth, rv) return to_native(rv)
def _url_quote(string, charset='utf-8', errors='strict', safe='/:', unsafe=''): """URL encode a single string with a given encoding. :param s: the string to quote. :param charset: the charset to be used. :param safe: an optional sequence of safe characters. :param unsafe: an optional sequence of unsafe characters. .. versionadded:: 0.9.2 The `unsafe` parameter was added. """ if not isinstance(string, (text_type, bytes, bytearray)): string = text_type(string) if isinstance(string, text_type): string = string.encode(charset, errors) safe = _get_stringy_set_impl(safe, charset, errors) unsafe = _get_stringy_set_impl(unsafe, charset, errors) safe = frozenset(safe + _always_safe) - frozenset(unsafe) rv = _transform_impl(string, safe) return to_native(bytes(rv))
def url_encode(obj, charset="utf-8", encode_keys=False, sort=False, key=None, separator=b"&"): """URL encode a dict/`MultiDict`. If a value is `None` it will not appear in the result string. Per default only values are encoded into the target charset strings. If `encode_keys` is set to ``True`` unicode keys are supported too. If `sort` is set to `True` the items are sorted by `key` or the default sorting algorithm. .. versionadded:: 0.5 `sort`, `key`, and `separator` were added. :param obj: the object to encode into a query string. :param charset: the charset of the query string. :param encode_keys: set to `True` if you have unicode keys. (Ignored on Python 3.x) :param sort: set to `True` if you want parameters to be sorted by `key`. :param separator: the separator to be used for the pairs. :param key: an optional function to be used for sorting. For more details check out the :func:`sorted` documentation. """ separator = to_native(separator, "ascii") return separator.join(_url_encode_impl(obj, charset, encode_keys, sort, key))
def __call__(self, *path, **query): if path and isinstance(path[-1], dict): if query: raise TypeError("keyword arguments and query-dicts " "can't be combined") query, path = path[-1], path[:-1] elif query: query = dict([(k.endswith("_") and k[:-1] or k, v) for k, v in query.items()]) path = "/".join([ to_unicode(url_quote(x, self.charset), "ascii") for x in path if x is not None ]).lstrip("/") rv = self.base if path: if not rv.endswith("/"): rv += "/" rv = url_join(rv, "./" + path) if query: rv += "?" + to_unicode( url_encode(query, self.charset, sort=self.sort, key=self.key), "ascii") return to_native(rv)
def parse_cookie(header, errors='replace', cls=None): """Parse a cookie. Either from a string or WSGI environ. Per default encoding errors are ignored. If you want a different behavior you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a :exc:`HTTPUnicodeError` is raised. .. versionchanged:: 0.5 This function now returns a :class:`TypeConversionDict` instead of a regular dict. The `cls` parameter was added. :param header: the header to be used to parse the cookie. Alternatively this can be a WSGI environment. :param charset: the charset for the cookie values. :param errors: the error behavior for the charset decoding. :param cls: an optional dict class to use. If this is not specified or `None` the default :class:`TypeConversionDict` is used. """ if isinstance(header, dict): header = header.get('HTTP_COOKIE', '') header = to_native(header, _cookie_charset) if cls is None: cls = TypeConversionDict cookie = _ExtendedCookie() cookie.load(header) result = {} # decode to unicode and skip broken items. Our extended morsel # and extended cookie will catch CookieErrors and convert them to # `None` items which we have to skip here. for key, value in iteritems(cookie): if value.value is not None: result[to_unicode(key, _cookie_charset)] = \ to_unicode(unquote_header_value(value.value), _cookie_charset) return cls(result)
def _normalize_key(self, key): key = to_native(key, 'utf-8') if self.key_prefix: key = self.key_prefix + key return key
def __init__(self, *args, **kwargs): super(PatchedMemcachedCache, self).__init__(*args, **kwargs) self.key_prefix = to_native(self.key_prefix)
def stream_encode_multipart(values, use_tempfile=True, threshold=1024 * 500, boundary=None, charset='utf-8'): """Encode a dict of values (either strings or file descriptors or :class:`FileStorage` objects.) into a multipart encoded string stored in a file descriptor. """ if boundary is None: boundary = '---------------WerkzeugFormPart_%s%s' % (time(), random()) _closure = [BytesIO(), 0, False] if use_tempfile: def write_binary(string): stream, total_length, on_disk = _closure if on_disk: stream.write(string) else: length = len(string) if length + _closure[1] <= threshold: stream.write(string) else: new_stream = TemporaryFile('wb+') new_stream.write(stream.getvalue()) new_stream.write(string) _closure[0] = new_stream _closure[2] = True _closure[1] = total_length + length else: write_binary = _closure[0].write def write(string): write_binary(string.encode(charset)) if not isinstance(values, MultiDict): values = MultiDict(values) for key, values in iterlists(values): for value in values: write('--%s\r\nContent-Disposition: form-data; name="%s"' % (boundary, key)) reader = getattr(value, 'read', None) if reader is not None: filename = getattr(value, 'filename', getattr(value, 'name', None)) content_type = getattr(value, 'content_type', None) if content_type is None: content_type = filename and \ mimetypes.guess_type(filename)[0] or \ 'application/octet-stream' if filename is not None: write('; filename="%s"\r\n' % filename) else: write('\r\n') write('Content-Type: %s\r\n\r\n' % content_type) while 1: chunk = reader(16384) if not chunk: break write_binary(chunk) else: if isinstance(value, string_types): value = to_native(value, charset) else: value = str(value) write('\r\n\r\n' + value) write('\r\n') write('--%s--\r\n' % boundary) length = int(_closure[0].tell()) _closure[0].seek(0) return _closure[0], length, boundary
def iri_to_uri(iri, charset='utf-8', errors='strict', safe_conversion=False): r""" Converts any unicode based IRI to an acceptable ASCII URI. Werkzeug always uses utf-8 URLs internally because this is what browsers and HTTP do as well. In some places where it accepts an URL it also accepts a unicode IRI and converts it into a URI. Examples for IRI versus URI: >>> iri_to_uri(u'http://☃.net/') 'http://xn--n3h.net/' >>> iri_to_uri(u'http://üser:pässword@☃.net/påth') 'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th' There is a general problem with IRI and URI conversion with some protocols that appear in the wild that are in violation of the URI specification. In places where Werkzeug goes through a forced IRI to URI conversion it will set the `safe_conversion` flag which will not perform a conversion if the end result is already ASCII. This can mean that the return value is not an entirely correct URI but it will not destroy such invalid URLs in the process. As an example consider the following two IRIs:: magnet:?xt=uri:whatever itms-services://?action=download-manifest The internal representation after parsing of those URLs is the same and there is no way to reconstruct the original one. If safe conversion is enabled however this function becomes a noop for both of those strings as they both can be considered URIs. .. versionadded:: 0.6 .. versionchanged:: 0.9.6 The `safe_conversion` parameter was added. :param iri: The IRI to convert. :param charset: The charset for the URI. :param safe_conversion: indicates if a safe conversion should take place. For more information see the explanation above. """ if isinstance(iri, tuple): iri = url_unparse(iri) if safe_conversion: try: native_iri = to_native(iri) ascii_iri = to_native(iri).encode('ascii') if ascii_iri.split() == [ascii_iri]: return native_iri except UnicodeError: pass iri = url_parse(to_unicode(iri, charset, errors)) netloc = iri.encode_netloc() path = url_quote(iri.path, charset, errors, '/:~+%') query = url_quote(iri.query, charset, errors, '%&[]:;$*()+,!?*/=') fragment = url_quote(iri.fragment, charset, errors, '=%&[]:;$()+,!?*/') return to_native(url_unparse((iri.scheme, netloc, path, query, fragment)))
def dump_cookie(key, value='', max_age=None, expires=None, path='/', domain=None, secure=None, httponly=False, sync_expires=True): """Creates a new Set-Cookie header without the ``Set-Cookie`` prefix The parameters are the same as in the cookie Morsel object in the Python standard library but it accepts unicode data, too. :param max_age: should be a number of seconds, or `None` (default) if the cookie should last only as long as the client's browser session. Additionally `timedelta` objects are accepted, too. :param expires: should be a `datetime` object or unix timestamp. :param path: limits the cookie to a given path, per default it will span the whole domain. :param domain: Use this if you want to set a cross-domain cookie. For example, ``domain=".example.com"`` will set a cookie that is readable by the domain ``www.example.com``, ``foo.example.com`` etc. Otherwise, a cookie will only be readable by the domain that set it. :param secure: The cookie will only be available via HTTPS :param httponly: disallow JavaScript to access the cookie. This is an extension to the cookie standard and probably not supported by all browsers. :param charset: the encoding for unicode values. :param sync_expires: automatically set expires if max_age is defined but expires not. """ if not isinstance(key, (bytes, text_type)): raise TypeError('invalid key %r' % key) if not isinstance(value, (bytes, text_type)): raise TypeError('invalid value %r' % value) key, value = to_native(key, _cookie_charset), to_native(value, _cookie_charset) value = quote_header_value(value) morsel = _ExtendedMorsel(key, value) if isinstance(max_age, timedelta): max_age = (max_age.days * 60 * 60 * 24) + max_age.seconds if expires is not None: if not isinstance(expires, string_types): expires = cookie_date(expires) morsel['expires'] = expires elif max_age is not None and sync_expires: morsel['expires'] = cookie_date(time() + max_age) if domain and ':' in domain: # The port part of the domain should NOT be used. Strip it domain = domain.split(':', 1)[0] if domain: assert '.' in domain, ( "Setting \"domain\" for a cookie on a server running localy (ex: " "localhost) is not supportted by complying browsers. You should " "have something like: \"127.0.0.1 localhost dev.localhost\" on " "your hosts file and then point your server to run on " "\"dev.localhost\" and also set \"domain\" for \"dev.localhost\"") for k, v in (('path', path), ('domain', domain), ('secure', secure), ('max-age', max_age), ('httponly', httponly)): if v is not None and v is not False: morsel[k] = str(v) return to_unicode(morsel.output(header='').lstrip(), _cookie_charset)
def log(self, logfile=None): """Log the ASCII traceback into a file object.""" if logfile is None: logfile = sys.stderr tb = self.plaintext.rstrip() + u'\n' logfile.write(to_native(tb, "utf-8", "replace"))