def add(self, name, value): # type: (str, str) -> None """Adds a new value for the given key.""" norm_name = _normalized_headers[name] self._last_key = norm_name if norm_name in self: self._dict[norm_name] = (native_str(self[norm_name]) + ',' + native_str(value)) self._as_list[norm_name].append(value) else: self[norm_name] = value
def write_headers(self, start_line, headers, chunk=None, callback=None): if self.method == 'HEAD': self._expected_content_remaining = 0 elif 'Content-Length' in headers: self._expected_content_remaining = int(headers['Content-Length']) else: self._expected_content_remaining = None self.start_response('%s %s' % (start_line.code, start_line.reason), [(native_str(k), native_str(v)) for (k, v) in headers.get_all()]) if chunk is not None: self.write(chunk, callback) elif callback is not None: callback() return _dummy_future
def test_invalid_cookies(self): """ Cookie strings that go against RFC6265 but browsers will send if set via document.cookie. """ # Chunks without an equals sign appear as unnamed values per # https://bugzilla.mozilla.org/show_bug.cgi?id=169091 self.assertIn( 'django_language', parse_cookie('abc=def; unnamed; django_language=en').keys()) # Even a double quote may be an unamed value. self.assertEqual(parse_cookie('a=b; "; c=d'), { 'a': 'b', '': '"', 'c': 'd' }) # Spaces in names and values, and an equals sign in values. self.assertEqual(parse_cookie('a b c=d e = f; gh=i'), { 'a b c': 'd e = f', 'gh': 'i' }) # More characters the spec forbids. self.assertEqual(parse_cookie('a b,c<>@:/[]?{}=d " =e,f g'), {'a b,c<>@:/[]?{}': 'd " =e,f g'}) # Unicode characters. The spec only allows ASCII. self.assertEqual(parse_cookie('saint=André Bessette'), {'saint': native_str('André Bessette')}) # Browsers don't send extra whitespace or semicolons in Cookie headers, # but parse_cookie() should parse whitespace the same way # document.cookie parses whitespace. self.assertEqual(parse_cookie(' = b ; ; = ; c = ; '), { '': 'b', 'c': '' })
def parse_config_file(self, path, final=True): """Parses and loads the Python config file at the given path. If ``final`` is ``False``, parse callbacks will not be run. This is useful for applications that wish to combine configurations from multiple sources. .. versionchanged:: 4.1 Config files are now always interpreted as utf-8 instead of the system default encoding. .. versionchanged:: 4.4 The special variable ``__file__`` is available inside config files, specifying the absolute path to the config file itself. """ config = {'__file__': os.path.abspath(path)} with open(path, 'rb') as f: exec_in(native_str(f.read()), config, config) for name in config: normalized = self._normalize_name(name) if normalized in self._options: self._options[normalized].set(config[name]) if final: self.run_parse_callbacks()
def test_100_continue(self): # Run through a 100-continue interaction by hand: # When given Expect: 100-continue, we get a 100 response after the # headers, and then the real response after the body. stream = IOStream(socket.socket(), io_loop=self.io_loop) stream.connect(("127.0.0.1", self.get_http_port()), callback=self.stop) self.wait() stream.write(b"\r\n".join([ b"POST /hello HTTP/1.1", b"Content-Length: 1024", b"Expect: 100-continue", b"Connection: close", b"\r\n" ]), callback=self.stop) self.wait() stream.read_until(b"\r\n\r\n", self.stop) data = self.wait() self.assertTrue(data.startswith(b"HTTP/1.1 100 "), data) stream.write(b"a" * 1024) stream.read_until(b"\r\n", self.stop) first_line = self.wait() self.assertTrue(first_line.startswith(b"HTTP/1.1 200"), first_line) stream.read_until(b"\r\n\r\n", self.stop) header_data = self.wait() headers = HTTPHeaders.parse(native_str(header_data.decode('latin1'))) stream.read_bytes(int(headers["Content-Length"]), self.stop) body = self.wait() self.assertEqual(body, b"Got 1024 bytes in POST") stream.close()
def parse_body_arguments(content_type, body, arguments, files, headers=None): """Parses a form request body. Supports ``application/x-www-form-urlencoded`` and ``multipart/form-data``. The ``content_type`` parameter should be a string and ``body`` should be a byte string. The ``arguments`` and ``files`` parameters are dictionaries that will be updated with the parsed contents. """ if headers and "Content-Encoding" in headers: gen_log.warning("Unsupported Content-Encoding: %s", headers["Content-Encoding"]) return if content_type.startswith("application/x-www-form-urlencoded"): try: uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) except Exception as e: gen_log.warning("Invalid x-www-form-urlencoded body: %s", e) uri_arguments = {} for name, values in uri_arguments.items(): if values: arguments.setdefault(name, []).extend(values) elif content_type.startswith("multipart/form-data"): try: fields = content_type.split(";") for field in fields: k, sep, v = field.strip().partition("=") if k == "boundary" and v: parse_multipart_form_data(utf8(v), body, arguments, files) break else: raise ValueError("multipart boundary not found") except Exception as e: gen_log.warning("Invalid multipart/form-data: %s", e)
def main(): from salt.ext.tornado.options import define, options, parse_command_line define("print_headers", type=bool, default=False) define("print_body", type=bool, default=True) define("follow_redirects", type=bool, default=True) define("validate_cert", type=bool, default=True) args = parse_command_line() client = HTTPClient() for arg in args: try: response = client.fetch( arg, follow_redirects=options.follow_redirects, validate_cert=options.validate_cert, ) except HTTPError as e: if e.response is not None: response = e.response else: raise if options.print_headers: print(response.headers) if options.print_body: print(native_str(response.body)) client.close()
def compute_accept_value(key): """Computes the value for the Sec-WebSocket-Accept header, given the value for Sec-WebSocket-Key. """ sha1 = hashlib.sha1() sha1.update(utf8(key)) sha1.update(b"258EAFA5-E914-47DA-95CA-C5AB0DC85B11") # Magic value return native_str(base64.b64encode(sha1.digest()))
def _curl_debug(self, debug_type, debug_msg): debug_types = ('I', '<', '>', '<', '>') debug_msg = native_str(debug_msg) if debug_type == 0: curl_log.debug('%s', debug_msg.strip()) elif debug_type in (1, 2): for line in debug_msg.splitlines(): curl_log.debug('%s %s', debug_types[debug_type], line) elif debug_type == 4: curl_log.debug('%s %r', debug_types[debug_type], debug_msg)
def __str__(self): if self.address_family in (socket.AF_INET, socket.AF_INET6): return self.remote_ip elif isinstance(self.address, bytes): # Python 3 with the -bb option warns about str(bytes), # so convert it explicitly. # Unix socket addresses are str on mac but bytes on linux. return native_str(self.address) else: return str(self.address)
def _oauth_parse_response(body): # I can't find an officially-defined encoding for oauth responses and # have never seen anyone use non-ascii. Leave the response in a byte # string for python 2, and use utf8 on python 3. body = escape.native_str(body) p = urlparse.parse_qs(body, keep_blank_values=False) token = dict(key=p["oauth_token"][0], secret=p["oauth_token_secret"][0]) # Add the extra parameters the Provider included to the token special = ("oauth_token", "oauth_token_secret") token.update((k, p[k][0]) for k in p if k not in special) return token
def test_unicode_newlines(self): # Ensure that only \r\n is recognized as a header separator, and not # the other newline-like unicode characters. # Characters that are likely to be problematic can be found in # http://unicode.org/standard/reports/tr13/tr13-5.html # and cpython's unicodeobject.c (which defines the implementation # of unicode_type.splitlines(), and uses a different list than TR13). newlines = [ u'\u001b', # VERTICAL TAB u'\u001c', # FILE SEPARATOR u'\u001d', # GROUP SEPARATOR u'\u001e', # RECORD SEPARATOR u'\u0085', # NEXT LINE u'\u2028', # LINE SEPARATOR u'\u2029', # PARAGRAPH SEPARATOR ] for newline in newlines: # Try the utf8 and latin1 representations of each newline for encoding in ['utf8', 'latin1']: try: try: encoded = newline.encode(encoding) except UnicodeEncodeError: # Some chars cannot be represented in latin1 continue data = b'Cookie: foo=' + encoded + b'bar' # parse() wants a native_str, so decode through latin1 # in the same way the real parser does. headers = HTTPHeaders.parse( native_str(data.decode('latin1'))) expected = [ ('Cookie', 'foo=' + native_str(encoded.decode('latin1')) + 'bar') ] self.assertEqual(expected, list(headers.get_all())) except Exception: gen_log.warning("failed while trying %r in %s", newline, encoding) raise
def parse_response_start_line(line): """Returns a (version, code, reason) tuple for an HTTP 1.x response line. The response is a `collections.namedtuple`. >>> parse_response_start_line("HTTP/1.1 200 OK") ResponseStartLine(version='HTTP/1.1', code=200, reason='OK') """ line = native_str(line) match = re.match("(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)", line) if not match: raise HTTPInputError("Error parsing response start line") return ResponseStartLine(match.group(1), int(match.group(2)), match.group(3))
def _parse_headers(self, data): # The lstrip removes newlines that some implementations sometimes # insert between messages of a reused connection. Per RFC 7230, # we SHOULD ignore at least one empty line before the request. # http://tools.ietf.org/html/rfc7230#section-3.5 data = native_str(data.decode('latin1')).lstrip("\r\n") # RFC 7230 section allows for both CRLF and bare LF. eol = data.find("\n") start_line = data[:eol].rstrip("\r") try: headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # probably form split() if there was no ':' in the line raise httputil.HTTPInputError("Malformed HTTP headers: %r" % data[eol:100]) return start_line, headers
def _curl_header_callback(self, headers, header_callback, header_line): header_line = native_str(header_line.decode('latin1')) if header_callback is not None: self.io_loop.add_callback(header_callback, header_line) # header_line as returned by curl includes the end-of-line characters. # whitespace at the start should be preserved to allow multi-line headers header_line = header_line.rstrip() if header_line.startswith("HTTP/"): headers.clear() try: (__, __, reason) = httputil.parse_response_start_line(header_line) header_line = "X-Http-Reason: %s" % reason except httputil.HTTPInputError: return if not header_line: return headers.parse_line(header_line)
def write_headers(self, start_line, headers, chunk=None, callback=None): """Implements `.HTTPConnection.write_headers`.""" lines = [] if self.is_client: self._request_start_line = start_line lines.append( utf8('%s %s HTTP/1.1' % (start_line[0], start_line[1]))) # Client requests with a non-empty body must have either a # Content-Length or a Transfer-Encoding. self._chunking_output = (start_line.method in ('POST', 'PUT', 'PATCH') and 'Content-Length' not in headers and 'Transfer-Encoding' not in headers) else: self._response_start_line = start_line lines.append( utf8('HTTP/1.1 %d %s' % (start_line[1], start_line[2]))) self._chunking_output = ( # TODO: should this use # self._request_start_line.version or # start_line.version? self._request_start_line.version == 'HTTP/1.1' and # 1xx, 204 and 304 responses have no body (not even a zero-length # body), and so should not have either Content-Length or # Transfer-Encoding headers. start_line.code not in (204, 304) and (start_line.code < 100 or start_line.code >= 200) and # No need to chunk the output if a Content-Length is specified. 'Content-Length' not in headers and # Applications are discouraged from touching Transfer-Encoding, # but if they do, leave it alone. 'Transfer-Encoding' not in headers) # If a 1.0 client asked for keep-alive, add the header. if (self._request_start_line.version == 'HTTP/1.0' and (self._request_headers.get('Connection', '').lower() == 'keep-alive')): headers['Connection'] = 'Keep-Alive' if self._chunking_output: headers['Transfer-Encoding'] = 'chunked' if (not self.is_client and (self._request_start_line.method == 'HEAD' or start_line.code == 304)): self._expected_content_remaining = 0 elif 'Content-Length' in headers: self._expected_content_remaining = int(headers['Content-Length']) else: self._expected_content_remaining = None # TODO: headers are supposed to be of type str, but we still have some # cases that let bytes slip through. Remove these native_str calls when those # are fixed. header_lines = (native_str(n) + ": " + native_str(v) for n, v in headers.get_all()) if PY3: lines.extend(l.encode('latin1') for l in header_lines) else: lines.extend(header_lines) for line in lines: if b'\n' in line: raise ValueError('Newline in header: ' + repr(line)) future = None if self.stream.closed(): future = self._write_future = Future() future.set_exception(iostream.StreamClosedError()) future.exception() else: if callback is not None: self._write_callback = stack_context.wrap(callback) else: future = self._write_future = Future() data = b"\r\n".join(lines) + b"\r\n\r\n" if chunk: data += self._format_chunk(chunk) self._pending_write = self.stream.write(data) self._pending_write.add_done_callback(self._on_write_complete) return future
def __init__(self, template_string, name="<string>", loader=None, compress_whitespace=_UNSET, autoescape=_UNSET, whitespace=None): """Construct a Template. :arg str template_string: the contents of the template file. :arg str name: the filename from which the template was loaded (used for error message). :arg tornado.template.BaseLoader loader: the `~tornado.template.BaseLoader` responsible for this template, used to resolve ``{% include %}`` and ``{% extend %}`` directives. :arg bool compress_whitespace: Deprecated since Tornado 4.3. Equivalent to ``whitespace="single"`` if true and ``whitespace="all"`` if false. :arg str autoescape: The name of a function in the template namespace, or ``None`` to disable escaping by default. :arg str whitespace: A string specifying treatment of whitespace; see `filter_whitespace` for options. .. versionchanged:: 4.3 Added ``whitespace`` parameter; deprecated ``compress_whitespace``. """ self.name = escape.native_str(name) if compress_whitespace is not _UNSET: # Convert deprecated compress_whitespace (bool) to whitespace (str). if whitespace is not None: raise Exception("cannot set both whitespace and compress_whitespace") whitespace = "single" if compress_whitespace else "all" if whitespace is None: if loader and loader.whitespace: whitespace = loader.whitespace else: # Whitespace defaults by filename. if name.endswith(".html") or name.endswith(".js"): whitespace = "single" else: whitespace = "all" # Validate the whitespace setting. filter_whitespace(whitespace, '') if autoescape is not _UNSET: self.autoescape = autoescape elif loader: self.autoescape = loader.autoescape else: self.autoescape = _DEFAULT_AUTOESCAPE self.namespace = loader.namespace if loader else {} reader = _TemplateReader(name, escape.native_str(template_string), whitespace) self.file = _File(self, _parse(reader, self)) self.code = self._generate_python(loader) self.loader = loader try: # Under python2.5, the fake filename used here must match # the module name used in __name__ below. # The dont_inherit flag prevents template.py's future imports # from being applied to the generated code. self.compiled = compile( escape.to_unicode(self.code), "%s.generated.py" % self.name.replace('.', '_'), "exec", dont_inherit=True) except Exception: formatted_code = _format_code(self.code).rstrip() app_log.error("%s code:\n%s", self.name, formatted_code) raise
def py_escape(s): self.assertEqual(type(s), bytes) return repr(native_str(s))
def test_non_ascii_header(self): # Non-ascii headers are sent as latin1. response = self.fetch("/set_header?k=foo&v=%E9") response.rethrow() self.assertEqual(response.headers["Foo"], native_str(u"\u00e9"))
def _curl_setup_request(self, curl, request, buffer, headers): curl.setopt(pycurl.URL, native_str(request.url)) # libcurl's magic "Expect: 100-continue" behavior causes delays # with servers that don't support it (which include, among others, # Google's OpenID endpoint). Additionally, this behavior has # a bug in conjunction with the curl_multi_socket_action API # (https://sourceforge.net/tracker/?func=detail&atid=100976&aid=3039744&group_id=976), # which increases the delays. It's more trouble than it's worth, # so just turn off the feature (yes, setting Expect: to an empty # value is the official way to disable this) if "Expect" not in request.headers: request.headers["Expect"] = "" # libcurl adds Pragma: no-cache by default; disable that too if "Pragma" not in request.headers: request.headers["Pragma"] = "" curl.setopt(pycurl.HTTPHEADER, ["%s: %s" % (native_str(k), native_str(v)) for k, v in request.headers.get_all()]) curl.setopt(pycurl.HEADERFUNCTION, functools.partial(self._curl_header_callback, headers, request.header_callback)) if request.streaming_callback: def write_function(chunk): self.io_loop.add_callback(request.streaming_callback, chunk) else: write_function = buffer.write if bytes is str: # py2 curl.setopt(pycurl.WRITEFUNCTION, write_function) else: # py3 # Upstream pycurl doesn't support py3, but ubuntu 12.10 includes # a fork/port. That version has a bug in which it passes unicode # strings instead of bytes to the WRITEFUNCTION. This means that # if you use a WRITEFUNCTION (which tornado always does), you cannot # download arbitrary binary data. This needs to be fixed in the # ported pycurl package, but in the meantime this lambda will # make it work for downloading (utf8) text. curl.setopt(pycurl.WRITEFUNCTION, lambda s: write_function(utf8(s))) curl.setopt(pycurl.FOLLOWLOCATION, request.follow_redirects) curl.setopt(pycurl.MAXREDIRS, request.max_redirects) curl.setopt(pycurl.CONNECTTIMEOUT_MS, int(1000 * request.connect_timeout)) curl.setopt(pycurl.TIMEOUT_MS, int(1000 * request.request_timeout)) if request.user_agent: curl.setopt(pycurl.USERAGENT, native_str(request.user_agent)) else: curl.setopt(pycurl.USERAGENT, "Mozilla/5.0 (compatible; pycurl)") if request.network_interface: curl.setopt(pycurl.INTERFACE, request.network_interface) if request.decompress_response: curl.setopt(pycurl.ENCODING, "gzip,deflate") else: curl.setopt(pycurl.ENCODING, "none") if request.proxy_host and request.proxy_port: curl.setopt(pycurl.PROXY, request.proxy_host) curl.setopt(pycurl.PROXYPORT, request.proxy_port) if request.proxy_username: credentials = '%s:%s' % (request.proxy_username, request.proxy_password) curl.setopt(pycurl.PROXYUSERPWD, credentials) if (request.proxy_auth_mode is None or request.proxy_auth_mode == "basic"): curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_BASIC) elif request.proxy_auth_mode == "digest": curl.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError( "Unsupported proxy_auth_mode %s" % request.proxy_auth_mode) else: curl.setopt(pycurl.PROXY, '') curl.unsetopt(pycurl.PROXYUSERPWD) if request.validate_cert: curl.setopt(pycurl.SSL_VERIFYPEER, 1) curl.setopt(pycurl.SSL_VERIFYHOST, 2) else: curl.setopt(pycurl.SSL_VERIFYPEER, 0) curl.setopt(pycurl.SSL_VERIFYHOST, 0) if request.ca_certs is not None: curl.setopt(pycurl.CAINFO, request.ca_certs) else: # There is no way to restore pycurl.CAINFO to its default value # (Using unsetopt makes it reject all certificates). # I don't see any way to read the default value from python so it # can be restored later. We'll have to just leave CAINFO untouched # if no ca_certs file was specified, and require that if any # request uses a custom ca_certs file, they all must. pass if request.allow_ipv6 is False: # Curl behaves reasonably when DNS resolution gives an ipv6 address # that we can't reach, so allow ipv6 unless the user asks to disable. curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) else: curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) # Set the request method through curl's irritating interface which makes # up names for almost every single method curl_options = { "GET": pycurl.HTTPGET, "POST": pycurl.POST, "PUT": pycurl.UPLOAD, "HEAD": pycurl.NOBODY, } custom_methods = set(["DELETE", "OPTIONS", "PATCH"]) for o in curl_options.values(): curl.setopt(o, False) if request.method in curl_options: curl.unsetopt(pycurl.CUSTOMREQUEST) curl.setopt(curl_options[request.method], True) elif request.allow_nonstandard_methods or request.method in custom_methods: curl.setopt(pycurl.CUSTOMREQUEST, request.method) else: raise KeyError('unknown method ' + request.method) body_expected = request.method in ("POST", "PATCH", "PUT") body_present = request.body is not None if not request.allow_nonstandard_methods: # Some HTTP methods nearly always have bodies while others # almost never do. Fail in this case unless the user has # opted out of sanity checks with allow_nonstandard_methods. if ((body_expected and not body_present) or (body_present and not body_expected)): raise ValueError( 'Body must %sbe None for method %s (unless ' 'allow_nonstandard_methods is true)' % ('not ' if body_expected else '', request.method)) if body_expected or body_present: if request.method == "GET": # Even with `allow_nonstandard_methods` we disallow # GET with a body (because libcurl doesn't allow it # unless we use CUSTOMREQUEST). While the spec doesn't # forbid clients from sending a body, it arguably # disallows the server from doing anything with them. raise ValueError('Body must be None for GET request') request_buffer = BytesIO(utf8(request.body or '')) def ioctl(cmd): if cmd == curl.IOCMD_RESTARTREAD: request_buffer.seek(0) curl.setopt(pycurl.READFUNCTION, request_buffer.read) curl.setopt(pycurl.IOCTLFUNCTION, ioctl) if request.method == "POST": curl.setopt(pycurl.POSTFIELDSIZE, len(request.body or '')) else: curl.setopt(pycurl.UPLOAD, True) curl.setopt(pycurl.INFILESIZE, len(request.body or '')) if request.auth_username is not None: userpwd = "%s:%s" % (request.auth_username, request.auth_password or '') if request.auth_mode is None or request.auth_mode == "basic": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC) elif request.auth_mode == "digest": curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST) else: raise ValueError("Unsupported auth_mode %s" % request.auth_mode) curl.setopt(pycurl.USERPWD, native_str(userpwd)) curl_log.debug("%s %s (username: %r)", request.method, request.url, request.auth_username) else: curl.unsetopt(pycurl.USERPWD) curl_log.debug("%s %s", request.method, request.url) if request.client_cert is not None: curl.setopt(pycurl.SSLCERT, request.client_cert) if request.client_key is not None: curl.setopt(pycurl.SSLKEY, request.client_key) if request.ssl_options is not None: raise ValueError("ssl_options not supported in curl_httpclient") if threading.activeCount() > 1: # libcurl/pycurl is not thread-safe by default. When multiple threads # are used, signals should be disabled. This has the side effect # of disabling DNS timeouts in some environments (when libcurl is # not linked against ares), so we don't do it when there is only one # thread. Applications that use many short-lived threads may need # to set NOSIGNAL manually in a prepare_curl_callback since # there may not be any other threads running at the time we call # threading.activeCount. curl.setopt(pycurl.NOSIGNAL, 1) if request.prepare_curl_callback is not None: request.prepare_curl_callback(curl)