def split_uri(uri): # urlsplit handles byte input by returning bytes on py3, so # scheme, netloc, path, query, and fragment are bytes scheme = netloc = path = query = fragment = b"" # urlsplit below will treat this as a scheme-less netloc, thereby losing # the original intent of the request. Here we shamelessly stole 4 lines of # code from the CPython stdlib to parse out the fragment and query but # leave the path alone. See # https://github.com/python/cpython/blob/8c9e9b0cd5b24dfbf1424d1f253d02de80e8f5ef/Lib/urllib/parse.py#L465-L468 # and https://github.com/Pylons/waitress/issues/260 if uri[:2] == b"//": path = uri if b"#" in path: path, fragment = path.split(b"#", 1) if b"?" in path: path, query = path.split(b"?", 1) else: try: scheme, netloc, path, query, fragment = urlparse.urlsplit(uri) except UnicodeError: raise ParsingError("Bad URI") return ( tostr(scheme), tostr(netloc), unquote_bytes_to_wsgi(path), tostr(query), tostr(fragment), )
def split_uri(uri): # urlsplit handles byte input by returning bytes on py3, so # scheme, netloc, path, query, and fragment are bytes scheme, netloc, path, query, fragment = urlparse.urlsplit(uri) return ( tostr(scheme), tostr(netloc), unquote_bytes_to_wsgi(path), tostr(query), tostr(fragment), )
def split_uri(uri): # urlsplit handles byte input by returning bytes on py3, so # scheme, netloc, path, query, and fragment are bytes try: scheme, netloc, path, query, fragment = urlparse.urlsplit(uri) except UnicodeError: raise ParsingError('Bad URI') return ( tostr(scheme), tostr(netloc), unquote_bytes_to_wsgi(path), tostr(query), tostr(fragment), )
def get_header_lines(header): """ Splits the header into lines, putting multi-line headers together. """ r = [] lines = header.split(b"\r\n") for line in lines: if not line: continue if b"\r" in line or b"\n" in line: raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line)) if line.startswith((b" ", b"\t")): if not r: # https://corte.si/posts/code/pathod/pythonservers/index.html raise ParsingError('Malformed header line "%s"' % tostr(line)) r[-1] += line else: r.append(line) return r
def get_header_lines(header): """ Splits the header into lines, putting multi-line headers together. """ r = [] lines = header.split(b'\n') for line in lines: if line.startswith((b' ', b'\t')): if not r: # http://corte.si/posts/code/pathod/pythonservers/index.html raise ParsingError('Malformed header line "%s"' % tostr(line)) r[-1] += line else: r.append(line) return r
def get_header_lines(header): """ Splits the header into lines, putting multi-line headers together. """ r = [] lines = header.split(b'\n') for line in lines: if line.startswith((b' ', b'\t')): if not r: # https://corte.si/posts/code/pathod/pythonservers/index.html raise ParsingError('Malformed header line "%s"' % tostr(line)) r[-1] += line else: r.append(line) return r
def crack_first_line(line): m = first_line_re.match(line) if m is not None and m.end() == len(line): if m.group(3): version = m.group(5) else: version = b"" method = m.group(1) # the request methods that are currently defined are all uppercase: # https://www.iana.org/assignments/http-methods/http-methods.xhtml and # the request method is case sensitive according to # https://tools.ietf.org/html/rfc7231#section-4.1 # By disallowing anything but uppercase methods we save poor # unsuspecting souls from sending lowercase HTTP methods to waitress # and having the request complete, while servers like nginx drop the # request onto the floor. if method != method.upper(): raise ParsingError('Malformed HTTP method "%s"' % tostr(method)) uri = m.group(2) return method, uri, version else: return b"", b"", b""
def crack_first_line(line): m = first_line_re.match(line) if m is not None and m.end() == len(line): if m.group(3): version = m.group(5) else: version = None method = m.group(1) # the request methods that are currently defined are all uppercase: # https://www.iana.org/assignments/http-methods/http-methods.xhtml and # the request method is case sensitive according to # https://tools.ietf.org/html/rfc7231#section-4.1 # By disallowing anything but uppercase methods we save poor # unsuspecting souls from sending lowercase HTTP methods to waitress # and having the request complete, while servers like nginx drop the # request onto the floor. if method != method.upper(): raise ParsingError('Malformed HTTP method "%s"' % tostr(method)) uri = m.group(2) return method, uri, version else: return b'', b'', b''
def parse_header(self, header_plus): """ Parses the header_plus block of text (the headers plus the first line of the request). """ index = header_plus.find(b"\r\n") if index >= 0: first_line = header_plus[:index].rstrip() header = header_plus[index + 2:] else: raise ParsingError("HTTP message header invalid") if b"\r" in first_line or b"\n" in first_line: raise ParsingError("Bare CR or LF found in HTTP message") self.first_line = first_line # for testing lines = get_header_lines(header) headers = self.headers for line in lines: header = HEADER_FIELD.match(line) if not header: raise ParsingError("Invalid header") key, value = header.group("name", "value") if b"_" in key: # TODO(xistence): Should we drop this request instead? continue # Only strip off whitespace that is considered valid whitespace by # RFC7230, don't strip the rest value = value.strip(b" \t") key1 = tostr(key.upper().replace(b"-", b"_")) # If a header already exists, we append subsequent values # seperated by a comma. Applications already need to handle # the comma seperated values, as HTTP front ends might do # the concatenation for you (behavior specified in RFC2616). try: headers[key1] += tostr(b", " + value) except KeyError: headers[key1] = tostr(value) # command, uri, version will be bytes command, uri, version = crack_first_line(first_line) version = tostr(version) command = tostr(command) self.command = command self.version = version ( self.proxy_scheme, self.proxy_netloc, self.path, self.query, self.fragment, ) = split_uri(uri) self.url_scheme = self.adj.url_scheme connection = headers.get("CONNECTION", "") if version == "1.0": if connection.lower() != "keep-alive": self.connection_close = True if version == "1.1": # since the server buffers data from chunked transfers and clients # never need to deal with chunked requests, downstream clients # should not see the HTTP_TRANSFER_ENCODING header; we pop it # here te = headers.pop("TRANSFER_ENCODING", "") # NB: We can not just call bare strip() here because it will also # remove other non-printable characters that we explicitly do not # want removed so that if someone attempts to smuggle a request # with these characters we don't fall prey to it. # # For example \x85 is stripped by default, but it is not considered # valid whitespace to be stripped by RFC7230. encodings = [ encoding.strip(" \t").lower() for encoding in te.split(",") if encoding ] for encoding in encodings: # Out of the transfer-codings listed in # https://tools.ietf.org/html/rfc7230#section-4 we only support # chunked at this time. # Note: the identity transfer-coding was removed in RFC7230: # https://tools.ietf.org/html/rfc7230#appendix-A.2 and is thus # not supported if encoding not in {"chunked"}: raise TransferEncodingNotImplemented( "Transfer-Encoding requested is not supported.") if encodings and encodings[-1] == "chunked": self.chunked = True buf = OverflowableBuffer(self.adj.inbuf_overflow) self.body_rcv = ChunkedReceiver(buf) elif encodings: # pragma: nocover raise TransferEncodingNotImplemented( "Transfer-Encoding requested is not supported.") expect = headers.get("EXPECT", "").lower() self.expect_continue = expect == "100-continue" if connection.lower() == "close": self.connection_close = True if not self.chunked: try: cl = int(headers.get("CONTENT_LENGTH", 0)) except ValueError: raise ParsingError("Content-Length is invalid") self.content_length = cl if cl > 0: buf = OverflowableBuffer(self.adj.inbuf_overflow) self.body_rcv = FixedStreamReceiver(cl, buf)
def parse_header(self, header_plus): """ Parses the header_plus block of text (the headers plus the first line of the request). """ index = header_plus.find(b'\n') if index >= 0: first_line = header_plus[:index].rstrip() header = header_plus[index + 1:] else: first_line = header_plus.rstrip() header = b'' self.first_line = first_line # for testing lines = get_header_lines(header) headers = self.headers for line in lines: index = line.find(b':') if index > 0: key = line[:index] if b'_' in key: continue value = line[index + 1:].strip() key1 = tostr(key.upper().replace(b'-', b'_')) # If a header already exists, we append subsequent values # seperated by a comma. Applications already need to handle # the comma seperated values, as HTTP front ends might do # the concatenation for you (behavior specified in RFC2616). try: headers[key1] += tostr(b', ' + value) except KeyError: headers[key1] = tostr(value) # else there's garbage in the headers? # command, uri, version will be bytes command, uri, version = crack_first_line(first_line) version = tostr(version) command = tostr(command) self.command = command self.version = version (self.proxy_scheme, self.proxy_netloc, self.path, self.query, self.fragment) = split_uri(uri) self.url_scheme = self.adj.url_scheme connection = headers.get('CONNECTION', '') if version == '1.0': if connection.lower() != 'keep-alive': self.connection_close = True if version == '1.1': # since the server buffers data from chunked transfers and clients # never need to deal with chunked requests, downstream clients # should not see the HTTP_TRANSFER_ENCODING header; we pop it # here te = headers.pop('TRANSFER_ENCODING', '') if te.lower() == 'chunked': self.chunked = True buf = OverflowableBuffer(self.adj.inbuf_overflow) self.body_rcv = ChunkedReceiver(buf) expect = headers.get('EXPECT', '').lower() self.expect_continue = expect == '100-continue' if connection.lower() == 'close': self.connection_close = True if not self.chunked: try: cl = int(headers.get('CONTENT_LENGTH', 0)) except ValueError: cl = 0 self.content_length = cl if cl > 0: buf = OverflowableBuffer(self.adj.inbuf_overflow) self.body_rcv = FixedStreamReceiver(cl, buf)
def parse_header(self, header_plus): """ Parses the header_plus block of text (the headers plus the first line of the request). """ index = header_plus.find(b'\n') if index >= 0: first_line = header_plus[:index].rstrip() header = header_plus[index + 1:] else: first_line = header_plus.rstrip() header = b'' self.first_line = first_line # for testing lines = get_header_lines(header) headers = self.headers for line in lines: index = line.find(b':') if index > 0: key = line[:index] value = line[index + 1:].strip() key1 = tostr(key.upper().replace(b'-', b'_')) # If a header already exists, we append subsequent values # seperated by a comma. Applications already need to handle # the comma seperated values, as HTTP front ends might do # the concatenation for you (behavior specified in RFC2616). try: headers[key1] += tostr(b', ' + value) except KeyError: headers[key1] = tostr(value) # else there's garbage in the headers? # command, uri, version will be bytes command, uri, version = crack_first_line(first_line) version = tostr(version) command = tostr(command) self.command = command self.version = version (self.proxy_scheme, self.proxy_netloc, self.path, self.query, self.fragment) = split_uri(uri) self.url_scheme = self.adj.url_scheme connection = headers.get('CONNECTION', '') if version == '1.0': if connection.lower() != 'keep-alive': self.connection_close = True if version == '1.1': te = headers.get('TRANSFER_ENCODING', '') if te == 'chunked': self.chunked = True buf = OverflowableBuffer(self.adj.inbuf_overflow) self.body_rcv = ChunkedReceiver(buf) expect = headers.get('EXPECT', '').lower() self.expect_continue = expect == '100-continue' if connection.lower() == 'close': self.connection_close = True if not self.chunked: try: cl = int(headers.get('CONTENT_LENGTH', 0)) except ValueError: cl = 0 self.content_length = cl if cl > 0: buf = OverflowableBuffer(self.adj.inbuf_overflow) self.body_rcv = FixedStreamReceiver(cl, buf)