def test_utf8_logging(self): self.logger.error(u("\u00e9").encode("utf8")) if issubclass(bytes_type, basestring_type): # on python 2, utf8 byte strings (and by extension ascii byte # strings) are passed through as-is. self.assertEqual(self.get_output(), utf8(u("\u00e9"))) else: # on python 3, byte strings always get repr'd even if # they're ascii-only, so this degenerates into another # copy of test_bytes_logging. self.assertEqual(self.get_output(), utf8(repr(utf8(u("\u00e9")))))
def test_all_methods(self): for method in ['GET', 'DELETE', 'OPTIONS']: response = self.fetch('/all_methods', method=method) self.assertEqual(response.body, utf8(method)) for method in ['POST', 'PUT', 'PATCH']: response = self.fetch('/all_methods', method=method, body=b'') self.assertEqual(response.body, utf8(method)) response = self.fetch('/all_methods', method='HEAD') self.assertEqual(response.body, b'') response = self.fetch('/all_methods', method='OTHER', allow_nonstandard_methods=True) self.assertEqual(response.body, b'OTHER')
def test_url_unescape_unicode(self): tests = [ ('%C3%A9', u('\u00e9'), 'utf8'), ('%C3%A9', u('\u00c3\u00a9'), 'latin1'), ('%C3%A9', utf8(u('\u00e9')), None), ] for escaped, unescaped, encoding in tests: # input strings to url_unescape should only contain ascii # characters, but make sure the function accepts both byte # and unicode strings. self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped) self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
def test_unicode_literal_expression(self): # Unicode literals should be usable in templates. Note that this # test simulates unicode characters appearing directly in the # template file (with utf8 encoding), i.e. \u escapes would not # be used in the template file itself. if str is unicode_type: # python 3 needs a different version of this test since # 2to3 doesn't run on template internals template = Template(utf8(u('{{ "\u00e9" }}'))) else: template = Template(utf8(u('{{ u"\u00e9" }}'))) self.assertEqual(template.generate(), utf8(u("\u00e9")))
def test_url_escape_quote_plus(self): unescaped = '+ #%' plus_escaped = '%2B+%23%25' escaped = '%2B%20%23%25' self.assertEqual(url_escape(unescaped), plus_escaped) self.assertEqual(url_escape(unescaped, plus=False), escaped) self.assertEqual(url_unescape(plus_escaped), unescaped) self.assertEqual(url_unescape(escaped, plus=False), unescaped) self.assertEqual(url_unescape(plus_escaped, encoding=None), utf8(unescaped)) self.assertEqual(url_unescape(escaped, encoding=None, plus=False), utf8(unescaped))
def test_xhtml_escape(self): tests = [ ("<foo>", "<foo>"), (u("<foo>"), u("<foo>")), (b"<foo>", b"<foo>"), ("<>&\"'", "<>&"'"), ("&", "&amp;"), (u("<\u00e9>"), u("<\u00e9>")), (b"<\xc3\xa9>", b"<\xc3\xa9>"), ] for unescaped, escaped in tests: self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped)) self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def resolve(self, host, port, family=0): # getHostByName doesn't accept IP addresses, so if the input # looks like an IP address just return it immediately. if twisted.internet.abstract.isIPAddress(host): resolved = host resolved_family = socket.AF_INET elif twisted.internet.abstract.isIPv6Address(host): resolved = host resolved_family = socket.AF_INET6 else: deferred = self.resolver.getHostByName(utf8(host)) resolved = yield gen.Task(deferred.addBoth) if isinstance(resolved, failure.Failure): resolved.raiseException() elif twisted.internet.abstract.isIPAddress(resolved): resolved_family = socket.AF_INET elif twisted.internet.abstract.isIPv6Address(resolved): resolved_family = socket.AF_INET6 else: resolved_family = socket.AF_UNSPEC if family != socket.AF_UNSPEC and family != resolved_family: raise Exception('Requested socket family %d but got %d' % (family, resolved_family)) result = [ (resolved_family, (resolved, port)), ] raise gen.Return(result)
def environ(request): """Converts a `webalchemy.tornado.httpserver.HTTPRequest` to a WSGI environment. """ hostport = request.host.split(":") if len(hostport) == 2: host = hostport[0] port = int(hostport[1]) else: host = request.host port = 443 if request.protocol == "https" else 80 environ = { "REQUEST_METHOD": request.method, "SCRIPT_NAME": "", "PATH_INFO": to_wsgi_str(escape.url_unescape( request.path, encoding=None, plus=False)), "QUERY_STRING": request.query, "REMOTE_ADDR": request.remote_ip, "SERVER_NAME": host, "SERVER_PORT": str(port), "SERVER_PROTOCOL": request.version, "wsgi.version": (1, 0), "wsgi.url_scheme": request.protocol, "wsgi.input": BytesIO(escape.utf8(request.body)), "wsgi.errors": sys.stderr, "wsgi.multithread": False, "wsgi.multiprocess": True, "wsgi.run_once": False, } if "Content-Type" in request.headers: environ["CONTENT_TYPE"] = request.headers.pop("Content-Type") if "Content-Length" in request.headers: environ["CONTENT_LENGTH"] = request.headers.pop("Content-Length") for key, value in request.headers.items(): environ["HTTP_" + key.replace("-", "_").upper()] = value return environ
def parse_body_arguments(content_type, body, arguments, files): """Parses a form request body. Supports ``application/x-www-form-urlencoded`` and ``multipart/form-data``. The ``content_type`` parameter should be a string and ``body`` should be a byte string. The ``arguments`` and ``files`` parameters are dictionaries that will be updated with the parsed contents. """ if content_type.startswith("application/x-www-form-urlencoded"): try: uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) except Exception as e: gen_log.warning('Invalid x-www-form-urlencoded body: %s', e) uri_arguments = {} for name, values in uri_arguments.items(): if values: arguments.setdefault(name, []).extend(values) elif content_type.startswith("multipart/form-data"): fields = content_type.split(";") for field in fields: k, sep, v = field.strip().partition("=") if k == "boundary" and v: parse_multipart_form_data(utf8(v), body, arguments, files) break else: gen_log.warning("Invalid multipart/form-data")
def test_json_decode(self): # json_decode accepts both bytes and unicode, but strings it returns # are always unicode. self.assertEqual(json_decode(b'"foo"'), u("foo")) self.assertEqual(json_decode(u('"foo"')), u("foo")) # Non-ascii bytes are interpreted as utf8 self.assertEqual(json_decode(utf8(u('"\u00e9"'))), u("\u00e9"))
def test_break_in_apply(self): # This test verifies current behavior, although of course it would # be nice if apply didn't cause seemingly unrelated breakage try: Template(utf8("{% for i in [] %}{% apply foo %}{% break %}{% end %}{% end %}")) raise Exception("Did not get expected exception") except ParseError: pass
def test_json_encode(self): # json deals with strings, not bytes. On python 2 byte strings will # convert automatically if they are utf8; on python 3 byte strings # are not allowed. self.assertEqual(json_decode(json_encode(u("\u00e9"))), u("\u00e9")) if bytes_type is str: self.assertEqual(json_decode(json_encode(utf8(u("\u00e9")))), u("\u00e9")) self.assertRaises(UnicodeDecodeError, json_encode, b"\xe9")
def compute_accept_value(key): """Computes the value for the Sec-WebSocket-Accept header, given the value for Sec-WebSocket-Key. """ sha1 = hashlib.sha1() sha1.update(utf8(key)) sha1.update(b"258EAFA5-E914-47DA-95CA-C5AB0DC85B11") # Magic value return native_str(base64.b64encode(sha1.digest()))
def test_try(self): template = Template(utf8("""{% try %} try{% set y = 1/x %} {% except %}-except {% else %}-else {% finally %}-finally {% end %}""")) self.assertEqual(template.generate(x=1), b"\ntry\n-else\n-finally\n") self.assertEqual(template.generate(x=0), b"\ntry-except\n-finally\n")
def handle_read(self, data): logging.info("handle_read") data = to_unicode(data) if data == data.upper(): self.stream.write(b"error\talready capitalized\n") else: # data already has \n self.stream.write(utf8("ok\t%s" % data.upper())) self.stream.close()
def capitalize(self, request_data, callback): logging.info('capitalize') stream = IOStream(socket.socket(), io_loop=self.io_loop) logging.info('connecting') yield gen.Task(stream.connect, ('127.0.0.1', self.port)) stream.write(utf8(request_data + '\n')) logging.info('reading') data = yield gen.Task(stream.read_until, b'\n') logging.info('returning') stream.close() callback(self.process_response(data))
def generate(self, writer): value = self.value # Compress lots of white space to a single character. If the whitespace # breaks a line, have it continue to break a line, but just with a # single \n character if writer.compress_whitespace and "<pre>" not in value: value = re.sub(r"([\t ]+)", " ", value) value = re.sub(r"(\s*\n\s*)", "\n", value) if value: writer.write_line('_tt_append(%r)' % escape.utf8(value), self.line)
def __call__(self, request): data = {} response = [] def start_response(status, response_headers, exc_info=None): data["status"] = status data["headers"] = response_headers return response.append app_response = self.wsgi_application( WSGIContainer.environ(request), start_response) try: response.extend(app_response) body = b"".join(response) finally: if hasattr(app_response, "close"): app_response.close() if not data: raise Exception("WSGI app did not call start_response") status_code = int(data["status"].split()[0]) headers = data["headers"] header_set = set(k.lower() for (k, v) in headers) body = escape.utf8(body) if status_code != 304: if "content-length" not in header_set: headers.append(("Content-Length", str(len(body)))) if "content-type" not in header_set: headers.append(("Content-Type", "text/html; charset=UTF-8")) if "server" not in header_set: headers.append(("Server", "TornadoServer/%s" % webalchemy.tornado.version)) parts = [escape.utf8("HTTP/1.1 " + data["status"] + "\r\n")] for key, value in headers: parts.append(escape.utf8(key) + b": " + escape.utf8(value) + b"\r\n") parts.append(b"\r\n") parts.append(body) request.write(b"".join(parts)) request.finish() self._log(status_code, request)
def test_break_continue(self): template = Template(utf8("""\ {% for i in range(10) %} {% if i == 2 %} {% continue %} {% end %} {{ i }} {% if i == 6 %} {% break %} {% end %} {% end %}""")) result = template.generate() # remove extraneous whitespace result = b''.join(result.split()) self.assertEqual(result, b"013456")
def get(self): realm = 'test' opaque = 'asdf' # Real implementations would use a random nonce. nonce = "1234" username = '******' password = '******' auth_header = self.request.headers.get('Authorization', None) if auth_header is not None: auth_mode, params = auth_header.split(' ', 1) assert auth_mode == 'Digest' param_dict = {} for pair in params.split(','): k, v = pair.strip().split('=', 1) if v[0] == '"' and v[-1] == '"': v = v[1:-1] param_dict[k] = v assert param_dict['realm'] == realm assert param_dict['opaque'] == opaque assert param_dict['nonce'] == nonce assert param_dict['username'] == username assert param_dict['uri'] == self.request.path h1 = md5(utf8('%s:%s:%s' % (username, realm, password))).hexdigest() h2 = md5(utf8('%s:%s' % (self.request.method, self.request.path))).hexdigest() digest = md5(utf8('%s:%s:%s' % (h1, nonce, h2))).hexdigest() if digest == param_dict['response']: self.write('ok') else: self.write('fail') else: self.set_status(401) self.set_header('WWW-Authenticate', 'Digest realm="%s", nonce="%s", opaque="%s"' % (realm, nonce, opaque))
def raw_fetch(self, headers, body): with closing(Resolver(io_loop=self.io_loop)) as resolver: with closing(SimpleAsyncHTTPClient(self.io_loop, resolver=resolver)) as client: conn = RawRequestHTTPConnection( self.io_loop, client, httpclient._RequestProxy( httpclient.HTTPRequest(self.get_url("/")), dict(httpclient.HTTPRequest._DEFAULTS)), None, self.stop, 1024 * 1024, resolver) conn.set_request( b"\r\n".join(headers + [utf8("Content-Length: %d\r\n" % len(body))]) + b"\r\n" + body) response = self.wait() response.rethrow() return response
def test_body_encoding(self): unicode_body = u("\xe9") byte_body = binascii.a2b_hex(b"e9") # unicode string in body gets converted to utf8 response = self.fetch("/echopost", method="POST", body=unicode_body, headers={"Content-Type": "application/blah"}) self.assertEqual(response.headers["Content-Length"], "2") self.assertEqual(response.body, utf8(unicode_body)) # byte strings pass through directly response = self.fetch("/echopost", method="POST", body=byte_body, headers={"Content-Type": "application/blah"}) self.assertEqual(response.headers["Content-Length"], "1") self.assertEqual(response.body, byte_body) # Mixing unicode in headers and byte string bodies shouldn't # break anything response = self.fetch("/echopost", method="POST", body=byte_body, headers={"Content-Type": "application/blah"}, user_agent=u("foo")) self.assertEqual(response.headers["Content-Length"], "1") self.assertEqual(response.body, byte_body)
def test_special_filenames(self): filenames = ['a;b.txt', 'a"b.txt', 'a";b.txt', 'a;"b.txt', 'a";";.txt', 'a\\"b.txt', 'a\\b.txt', ] for filename in filenames: logging.debug("trying filename %r", filename) data = """\ --1234 Content-Disposition: form-data; name="files"; filename="%s" Foo --1234--""" % filename.replace('\\', '\\\\').replace('"', '\\"') data = utf8(data.replace("\n", "\r\n")) args = {} files = {} parse_multipart_form_data(b"1234", data, args, files) file = files["files"][0] self.assertEqual(file["filename"], filename) self.assertEqual(file["body"], b"Foo")
def _curl_setup_request(curl, request, buffer, headers): curl.setopt(pycurl.URL, native_str(request.url)) # libcurl's magic "Expect: 100-continue" behavior causes delays # with servers that don't support it (which include, among others, # Google's OpenID endpoint). Additionally, this behavior has # a bug in conjunction with the curl_multi_socket_action API # (https://sourceforge.net/tracker/?func=detail&atid=100976&aid=3039744&group_id=976), # which increases the delays. It's more trouble than it's worth, # so just turn off the feature (yes, setting Expect: to an empty # value is the official way to disable this) if "Expect" not in request.headers: request.headers["Expect"] = "" # libcurl adds Pragma: no-cache by default; disable that too if "Pragma" not in request.headers: request.headers["Pragma"] = "" # Request headers may be either a regular dict or HTTPHeaders object if isinstance(request.headers, httputil.HTTPHeaders): curl.setopt(pycurl.HTTPHEADER, [native_str("%s: %s" % i) for i in request.headers.get_all()]) else: curl.setopt(pycurl.HTTPHEADER, [native_str("%s: %s" % i) for i in request.headers.items()]) if request.header_callback: curl.setopt(pycurl.HEADERFUNCTION, lambda line: request.header_callback(native_str(line))) else: curl.setopt(pycurl.HEADERFUNCTION, lambda line: _curl_header_callback(headers, native_str(line))) if request.streaming_callback: write_function = request.streaming_callback else: write_function = buffer.write if bytes_type is str: # py2 curl.setopt(pycurl.WRITEFUNCTION, write_function) else: # py3 # Upstream pycurl doesn't support py3, but ubuntu 12.10 includes # a fork/port. That version has a bug in which it passes unicode # strings instead of bytes to the WRITEFUNCTION. This means that # if you use a WRITEFUNCTION (which webalchemy.tornado always does), you cannot # download arbitrary binary data. This needs to be fixed in the # ported pycurl package, but in the meantime this lambda will # make it work for downloading (utf8) text. curl.setopt(pycurl.WRITEFUNCTION, lambda s: write_function(utf8(s))) curl.setopt(pycurl.FOLLOWLOCATION, request.follow_redirects) curl.setopt(pycurl.MAXREDIRS, request.max_redirects) curl.setopt(pycurl.CONNECTTIMEOUT_MS, int(1000 * request.connect_timeout)) curl.setopt(pycurl.TIMEOUT_MS, int(1000 * request.request_timeout)) if request.user_agent: curl.setopt(pycurl.USERAGENT, native_str(request.user_agent)) else: curl.setopt(pycurl.USERAGENT, "Mozilla/5.0 (compatible; pycurl)") if request.network_interface: curl.setopt(pycurl.INTERFACE, request.network_interface) if request.use_gzip: curl.setopt(pycurl.ENCODING, "gzip,deflate") else: curl.setopt(pycurl.ENCODING, "none") if request.proxy_host and request.proxy_port: curl.setopt(pycurl.PROXY, request.proxy_host) curl.setopt(pycurl.PROXYPORT, request.proxy_port) if request.proxy_username: credentials = '%s:%s' % (request.proxy_username, request.proxy_password) curl.setopt(pycurl.PROXYUSERPWD, credentials) else: curl.setopt(pycurl.PROXY, '') curl.unsetopt(pycurl.PROXYUSERPWD) if request.validate_cert: curl.setopt(pycurl.SSL_VERIFYPEER, 1) curl.setopt(pycurl.SSL_VERIFYHOST, 2) else: curl.setopt(pycurl.SSL_VERIFYPEER, 0) curl.setopt(pycurl.SSL_VERIFYHOST, 0) if request.ca_certs is not None: curl.setopt(pycurl.CAINFO, request.ca_certs) else: # There is no way to restore pycurl.CAINFO to its default value # (Using unsetopt makes it reject all certificates). # I don't see any way to read the default value from python so it # can be restored later. We'll have to just leave CAINFO untouched # if no ca_certs file was specified, and require that if any # request uses a custom ca_certs file, they all must. pass if request.allow_ipv6 is False: # Curl behaves reasonably when DNS resolution gives an ipv6 address # that we can't reach, so allow ipv6 unless the user asks to disable. # (but see version check in _process_queue above) curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) else: curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) # Set the request method through curl's irritating interface which makes # up names for almost every single method curl_options = { "GET": pycurl.HTTPGET, "POST": pycurl.POST, "PUT": pycurl.UPLOAD, "HEAD": pycurl.NOBODY, } custom_methods = set(["DELETE", "OPTIONS", "PATCH"]) for o in curl_options.values(): curl.setopt(o, False) if request.method in curl_options: curl.unsetopt(pycurl.CUSTOMREQUEST) curl.setopt(curl_options[request.method], True) elif request.allow_nonstandard_methods or request.method in custom_methods: curl.setopt(pycurl.CUSTOMREQUEST, request.method) else: raise KeyError('unknown method ' + request.method) # Handle curl's cryptic options for every individual HTTP method if request.method in ("POST", "PUT"): if request.body is None: raise AssertionError( 'Body must not be empty for "%s" request' % request.method) request_buffer = BytesIO(utf8(request.body)) curl.setopt(pycurl.READFUNCTION, request_buffer.read) if request.method == "POST": def ioctl(cmd): if cmd == curl.IOCMD_RESTARTREAD: request_buffer.seek(0) curl.setopt(pycurl.IOCTLFUNCTION, ioctl) curl.setopt(pycurl.POSTFIELDSIZE, len(request.body)) else: curl.setopt(pycurl.INFILESIZE, len(request.body)) elif request.method == "GET": if request.body is not None: raise AssertionError('Body must be empty for GET request') if request.auth_username is not None: userpwd = "%s:%s" % (request.auth_username, request.auth_password or '') if request.auth_mode is None or request.auth_mode == "basic": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC) elif request.auth_mode == "digest": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError("Unsupported auth_mode %s" % request.auth_mode) curl.setopt(pycurl.USERPWD, native_str(userpwd)) gen_log.debug("%s %s (username: %r)", request.method, request.url, request.auth_username) else: curl.unsetopt(pycurl.USERPWD) gen_log.debug("%s %s", request.method, request.url) if request.client_cert is not None: curl.setopt(pycurl.SSLCERT, request.client_cert) if request.client_key is not None: curl.setopt(pycurl.SSLKEY, request.client_key) if threading.activeCount() > 1: # libcurl/pycurl is not thread-safe by default. When multiple threads # are used, signals should be disabled. This has the side effect # of disabling DNS timeouts in some environments (when libcurl is # not linked against ares), so we don't do it when there is only one # thread. Applications that use many short-lived threads may need # to set NOSIGNAL manually in a prepare_curl_callback since # there may not be any other threads running at the time we call # threading.activeCount. curl.setopt(pycurl.NOSIGNAL, 1) if request.prepare_curl_callback is not None: request.prepare_curl_callback(curl)
def test_unicode_apply(self): def upper(s): return to_unicode(s).upper() template = Template(utf8(u("{% apply upper %}foo \u00e9{% end %}"))) self.assertEqual(template.generate(upper=upper), utf8(u("FOO \u00c9")))
def body(self, value): self._body = utf8(value)
def test_non_ascii_name(self): name = webalchemy.tornado.locale.LOCALE_NAMES['es_LA']['name'] self.assertTrue(isinstance(name, unicode_type)) self.assertEqual(name, u('Espa\u00f1ol')) self.assertEqual(utf8(name), b'Espa\xc3\xb1ol')
def test_body_setter(self): request = HTTPRequest('http://example.com') request.body = 'foo' self.assertEqual(request.body, utf8('foo'))
def test_bytes_logging(self): with ignore_bytes_warning(): # This will be "\xe9" on python 2 or "b'\xe9'" on python 3 self.logger.error(b"\xe9") self.assertEqual(self.get_output(), utf8(repr(b"\xe9")))
def test_unicode_logging(self): self.logger.error(u("\u00e9")) self.assertEqual(self.get_output(), utf8(u("\u00e9")))