示例#1
0
def split_uri(uri):
    # urlsplit handles byte input by returning bytes on py3, so
    # scheme, netloc, path, query, and fragment are bytes

    scheme = netloc = path = query = fragment = b""

    # urlsplit below will treat this as a scheme-less netloc, thereby losing
    # the original intent of the request. Here we shamelessly stole 4 lines of
    # code from the CPython stdlib to parse out the fragment and query but
    # leave the path alone. See
    # https://github.com/python/cpython/blob/8c9e9b0cd5b24dfbf1424d1f253d02de80e8f5ef/Lib/urllib/parse.py#L465-L468
    # and https://github.com/Pylons/waitress/issues/260

    if uri[:2] == b"//":
        path = uri

        if b"#" in path:
            path, fragment = path.split(b"#", 1)

        if b"?" in path:
            path, query = path.split(b"?", 1)
    else:
        try:
            scheme, netloc, path, query, fragment = urlparse.urlsplit(uri)
        except UnicodeError:
            raise ParsingError("Bad URI")

    return (
        tostr(scheme),
        tostr(netloc),
        unquote_bytes_to_wsgi(path),
        tostr(query),
        tostr(fragment),
    )
示例#2
0
def split_uri(uri):
    # urlsplit handles byte input by returning bytes on py3, so
    # scheme, netloc, path, query, and fragment are bytes
    scheme, netloc, path, query, fragment = urlparse.urlsplit(uri)
    return (
        tostr(scheme),
        tostr(netloc),
        unquote_bytes_to_wsgi(path),
        tostr(query),
        tostr(fragment),
        )
示例#3
0
def split_uri(uri):
    # urlsplit handles byte input by returning bytes on py3, so
    # scheme, netloc, path, query, and fragment are bytes
    scheme, netloc, path, query, fragment = urlparse.urlsplit(uri)
    return (
        tostr(scheme),
        tostr(netloc),
        unquote_bytes_to_wsgi(path),
        tostr(query),
        tostr(fragment),
    )
示例#4
0
def split_uri(uri):
    # urlsplit handles byte input by returning bytes on py3, so
    # scheme, netloc, path, query, and fragment are bytes
    try:
        scheme, netloc, path, query, fragment = urlparse.urlsplit(uri)
    except UnicodeError:
        raise ParsingError('Bad URI')
    return (
        tostr(scheme),
        tostr(netloc),
        unquote_bytes_to_wsgi(path),
        tostr(query),
        tostr(fragment),
    )
示例#5
0
def split_uri(uri):
    # urlsplit handles byte input by returning bytes on py3, so
    # scheme, netloc, path, query, and fragment are bytes
    try:
        scheme, netloc, path, query, fragment = urlparse.urlsplit(uri)
    except UnicodeError:
        raise ParsingError('Bad URI')
    return (
        tostr(scheme),
        tostr(netloc),
        unquote_bytes_to_wsgi(path),
        tostr(query),
        tostr(fragment),
    )
示例#6
0
def get_header_lines(header):
    """
    Splits the header into lines, putting multi-line headers together.
    """
    r = []
    lines = header.split(b"\r\n")
    for line in lines:
        if not line:
            continue

        if b"\r" in line or b"\n" in line:
            raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line))

        if line.startswith((b" ", b"\t")):
            if not r:
                # https://corte.si/posts/code/pathod/pythonservers/index.html
                raise ParsingError('Malformed header line "%s"' % tostr(line))
            r[-1] += line
        else:
            r.append(line)
    return r
示例#7
0
def get_header_lines(header):
    """
    Splits the header into lines, putting multi-line headers together.
    """
    r = []
    lines = header.split(b'\n')
    for line in lines:
        if line.startswith((b' ', b'\t')):
            if not r:
                # http://corte.si/posts/code/pathod/pythonservers/index.html
                raise ParsingError('Malformed header line "%s"' % tostr(line))
            r[-1] += line
        else:
            r.append(line)
    return r
示例#8
0
def get_header_lines(header):
    """
    Splits the header into lines, putting multi-line headers together.
    """
    r = []
    lines = header.split(b'\n')
    for line in lines:
        if line.startswith((b' ', b'\t')):
            if not r:
                # https://corte.si/posts/code/pathod/pythonservers/index.html
                raise ParsingError('Malformed header line "%s"' % tostr(line))
            r[-1] += line
        else:
            r.append(line)
    return r
示例#9
0
def crack_first_line(line):
    m = first_line_re.match(line)
    if m is not None and m.end() == len(line):
        if m.group(3):
            version = m.group(5)
        else:
            version = b""
        method = m.group(1)

        # the request methods that are currently defined are all uppercase:
        # https://www.iana.org/assignments/http-methods/http-methods.xhtml and
        # the request method is case sensitive according to
        # https://tools.ietf.org/html/rfc7231#section-4.1

        # By disallowing anything but uppercase methods we save poor
        # unsuspecting souls from sending lowercase HTTP methods to waitress
        # and having the request complete, while servers like nginx drop the
        # request onto the floor.
        if method != method.upper():
            raise ParsingError('Malformed HTTP method "%s"' % tostr(method))
        uri = m.group(2)
        return method, uri, version
    else:
        return b"", b"", b""
示例#10
0
def crack_first_line(line):
    m = first_line_re.match(line)
    if m is not None and m.end() == len(line):
        if m.group(3):
            version = m.group(5)
        else:
            version = None
        method = m.group(1)

        # the request methods that are currently defined are all uppercase:
        # https://www.iana.org/assignments/http-methods/http-methods.xhtml and
        # the request method is case sensitive according to
        # https://tools.ietf.org/html/rfc7231#section-4.1

        # By disallowing anything but uppercase methods we save poor
        # unsuspecting souls from sending lowercase HTTP methods to waitress
        # and having the request complete, while servers like nginx drop the
        # request onto the floor.
        if method != method.upper():
            raise ParsingError('Malformed HTTP method "%s"' % tostr(method))
        uri = m.group(2)
        return method, uri, version
    else:
        return b'', b'', b''
示例#11
0
    def parse_header(self, header_plus):
        """
        Parses the header_plus block of text (the headers plus the
        first line of the request).
        """
        index = header_plus.find(b"\r\n")
        if index >= 0:
            first_line = header_plus[:index].rstrip()
            header = header_plus[index + 2:]
        else:
            raise ParsingError("HTTP message header invalid")

        if b"\r" in first_line or b"\n" in first_line:
            raise ParsingError("Bare CR or LF found in HTTP message")

        self.first_line = first_line  # for testing

        lines = get_header_lines(header)

        headers = self.headers
        for line in lines:
            header = HEADER_FIELD.match(line)

            if not header:
                raise ParsingError("Invalid header")

            key, value = header.group("name", "value")

            if b"_" in key:
                # TODO(xistence): Should we drop this request instead?
                continue

            # Only strip off whitespace that is considered valid whitespace by
            # RFC7230, don't strip the rest
            value = value.strip(b" \t")
            key1 = tostr(key.upper().replace(b"-", b"_"))
            # If a header already exists, we append subsequent values
            # seperated by a comma. Applications already need to handle
            # the comma seperated values, as HTTP front ends might do
            # the concatenation for you (behavior specified in RFC2616).
            try:
                headers[key1] += tostr(b", " + value)
            except KeyError:
                headers[key1] = tostr(value)

        # command, uri, version will be bytes
        command, uri, version = crack_first_line(first_line)
        version = tostr(version)
        command = tostr(command)
        self.command = command
        self.version = version
        (
            self.proxy_scheme,
            self.proxy_netloc,
            self.path,
            self.query,
            self.fragment,
        ) = split_uri(uri)
        self.url_scheme = self.adj.url_scheme
        connection = headers.get("CONNECTION", "")

        if version == "1.0":
            if connection.lower() != "keep-alive":
                self.connection_close = True

        if version == "1.1":
            # since the server buffers data from chunked transfers and clients
            # never need to deal with chunked requests, downstream clients
            # should not see the HTTP_TRANSFER_ENCODING header; we pop it
            # here
            te = headers.pop("TRANSFER_ENCODING", "")

            # NB: We can not just call bare strip() here because it will also
            # remove other non-printable characters that we explicitly do not
            # want removed so that if someone attempts to smuggle a request
            # with these characters we don't fall prey to it.
            #
            # For example \x85 is stripped by default, but it is not considered
            # valid whitespace to be stripped by RFC7230.
            encodings = [
                encoding.strip(" \t").lower() for encoding in te.split(",")
                if encoding
            ]

            for encoding in encodings:
                # Out of the transfer-codings listed in
                # https://tools.ietf.org/html/rfc7230#section-4 we only support
                # chunked at this time.

                # Note: the identity transfer-coding was removed in RFC7230:
                # https://tools.ietf.org/html/rfc7230#appendix-A.2 and is thus
                # not supported
                if encoding not in {"chunked"}:
                    raise TransferEncodingNotImplemented(
                        "Transfer-Encoding requested is not supported.")

            if encodings and encodings[-1] == "chunked":
                self.chunked = True
                buf = OverflowableBuffer(self.adj.inbuf_overflow)
                self.body_rcv = ChunkedReceiver(buf)
            elif encodings:  # pragma: nocover
                raise TransferEncodingNotImplemented(
                    "Transfer-Encoding requested is not supported.")

            expect = headers.get("EXPECT", "").lower()
            self.expect_continue = expect == "100-continue"
            if connection.lower() == "close":
                self.connection_close = True

        if not self.chunked:
            try:
                cl = int(headers.get("CONTENT_LENGTH", 0))
            except ValueError:
                raise ParsingError("Content-Length is invalid")

            self.content_length = cl
            if cl > 0:
                buf = OverflowableBuffer(self.adj.inbuf_overflow)
                self.body_rcv = FixedStreamReceiver(cl, buf)
示例#12
0
    def parse_header(self, header_plus):
        """
        Parses the header_plus block of text (the headers plus the
        first line of the request).
        """
        index = header_plus.find(b'\n')
        if index >= 0:
            first_line = header_plus[:index].rstrip()
            header = header_plus[index + 1:]
        else:
            first_line = header_plus.rstrip()
            header = b''

        self.first_line = first_line  # for testing

        lines = get_header_lines(header)

        headers = self.headers
        for line in lines:
            index = line.find(b':')
            if index > 0:
                key = line[:index]
                if b'_' in key:
                    continue
                value = line[index + 1:].strip()
                key1 = tostr(key.upper().replace(b'-', b'_'))
                # If a header already exists, we append subsequent values
                # seperated by a comma. Applications already need to handle
                # the comma seperated values, as HTTP front ends might do
                # the concatenation for you (behavior specified in RFC2616).
                try:
                    headers[key1] += tostr(b', ' + value)
                except KeyError:
                    headers[key1] = tostr(value)
            # else there's garbage in the headers?

        # command, uri, version will be bytes
        command, uri, version = crack_first_line(first_line)
        version = tostr(version)
        command = tostr(command)
        self.command = command
        self.version = version
        (self.proxy_scheme, self.proxy_netloc, self.path, self.query,
         self.fragment) = split_uri(uri)
        self.url_scheme = self.adj.url_scheme
        connection = headers.get('CONNECTION', '')

        if version == '1.0':
            if connection.lower() != 'keep-alive':
                self.connection_close = True

        if version == '1.1':
            # since the server buffers data from chunked transfers and clients
            # never need to deal with chunked requests, downstream clients
            # should not see the HTTP_TRANSFER_ENCODING header; we pop it
            # here
            te = headers.pop('TRANSFER_ENCODING', '')
            if te.lower() == 'chunked':
                self.chunked = True
                buf = OverflowableBuffer(self.adj.inbuf_overflow)
                self.body_rcv = ChunkedReceiver(buf)
            expect = headers.get('EXPECT', '').lower()
            self.expect_continue = expect == '100-continue'
            if connection.lower() == 'close':
                self.connection_close = True

        if not self.chunked:
            try:
                cl = int(headers.get('CONTENT_LENGTH', 0))
            except ValueError:
                cl = 0
            self.content_length = cl
            if cl > 0:
                buf = OverflowableBuffer(self.adj.inbuf_overflow)
                self.body_rcv = FixedStreamReceiver(cl, buf)
示例#13
0
    def parse_header(self, header_plus):
        """
        Parses the header_plus block of text (the headers plus the
        first line of the request).
        """
        index = header_plus.find(b'\n')
        if index >= 0:
            first_line = header_plus[:index].rstrip()
            header = header_plus[index + 1:]
        else:
            first_line = header_plus.rstrip()
            header = b''

        self.first_line = first_line # for testing

        lines = get_header_lines(header)

        headers = self.headers
        for line in lines:
            index = line.find(b':')
            if index > 0:
                key = line[:index]
                value = line[index + 1:].strip()
                key1 = tostr(key.upper().replace(b'-', b'_'))
                # If a header already exists, we append subsequent values
                # seperated by a comma. Applications already need to handle
                # the comma seperated values, as HTTP front ends might do 
                # the concatenation for you (behavior specified in RFC2616).
                try:
                    headers[key1] += tostr(b', ' + value)
                except KeyError:
                    headers[key1] = tostr(value)
            # else there's garbage in the headers?

        # command, uri, version will be bytes
        command, uri, version = crack_first_line(first_line)
        version = tostr(version)
        command = tostr(command)
        self.command = command
        self.version = version
        (self.proxy_scheme,
         self.proxy_netloc,
         self.path,
         self.query, self.fragment) = split_uri(uri)
        self.url_scheme = self.adj.url_scheme
        connection = headers.get('CONNECTION', '')

        if version == '1.0':
            if connection.lower() != 'keep-alive':
                self.connection_close = True

        if version == '1.1':
            te = headers.get('TRANSFER_ENCODING', '')
            if te == 'chunked':
                self.chunked = True
                buf = OverflowableBuffer(self.adj.inbuf_overflow)
                self.body_rcv = ChunkedReceiver(buf)
            expect = headers.get('EXPECT', '').lower()
            self.expect_continue = expect == '100-continue'
            if connection.lower() == 'close':
                self.connection_close = True

        if not self.chunked:
            try:
                cl = int(headers.get('CONTENT_LENGTH', 0))
            except ValueError:
                cl = 0
            self.content_length = cl
            if cl > 0:
                buf = OverflowableBuffer(self.adj.inbuf_overflow)
                self.body_rcv = FixedStreamReceiver(cl, buf)