示例#1
0
def parse_init(line):
    try:
        method, url, protocol = string.split(line)
    except ValueError:
        return None
    httpversion = parse_http_protocol(protocol)
    if not httpversion:
        return None
    if not utils.isascii(method):
        return None
    return method, url, httpversion
示例#2
0
 def _parse_init(self, line):
     try:
         method, url, protocol = string.split(line)
     except ValueError:
         return None
     httpversion = self._parse_http_protocol(protocol)
     if not httpversion:
         return None
     if not utils.isascii(method):
         return None
     return method, url, httpversion
示例#3
0
def parse_init_http(line):
    """
        Returns (method, url, httpversion)
    """
    v = parse_init(line)
    if not v:
        return None
    method, url, httpversion = v
    if not utils.isascii(url):
        return None
    if not (url.startswith("/") or url == "*"):
        return None
    return method, url, httpversion
示例#4
0
 def _parse_init_http(self, line):
     """
         Returns (method, url, httpversion)
     """
     v = self._parse_init(line)
     if not v:
         return None
     method, url, httpversion = v
     if not utils.isascii(url):
         return None
     if not (url.startswith("/") or url == "*"):
         return None
     return method, url, httpversion
示例#5
0
def parse_url(url):
    """
        Returns a (scheme, host, port, path) tuple, or None on error.

        Checks that:
            port is an integer 0-65535
            host is a valid IDNA-encoded hostname with no null-bytes
            path is valid ASCII
    """
    try:
        scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
    except ValueError:
        return None
    if not scheme:
        return None
    if '@' in netloc:
        # FIXME: Consider what to do with the discarded credentials here Most
        # probably we should extend the signature to return these as a separate
        # value.
        _, netloc = string.rsplit(netloc, '@', maxsplit=1)
    if ':' in netloc:
        host, port = string.rsplit(netloc, ':', maxsplit=1)
        try:
            port = int(port)
        except ValueError:
            return None
    else:
        host = netloc
        if scheme == "https":
            port = 443
        else:
            port = 80
    path = urlparse.urlunparse(('', '', path, params, query, fragment))
    if not path.startswith("/"):
        path = "/" + path
    if not _is_valid_host(host):
        return None
    if not utils.isascii(path):
        return None
    if not _is_valid_port(port):
        return None
    return scheme, host, port, path
示例#6
0
def parse_url(url):
    """
        Returns a (scheme, host, port, path) tuple, or None on error.

        Checks that:
            port is an integer 0-65535
            host is a valid IDNA-encoded hostname with no null-bytes
            path is valid ASCII
    """
    try:
        scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
    except ValueError:
        return None
    if not scheme:
        return None
    if '@' in netloc:
        # FIXME: Consider what to do with the discarded credentials here Most
        # probably we should extend the signature to return these as a separate
        # value.
        _, netloc = string.rsplit(netloc, '@', maxsplit=1)
    if ':' in netloc:
        host, port = string.rsplit(netloc, ':', maxsplit=1)
        try:
            port = int(port)
        except ValueError:
            return None
    else:
        host = netloc
        if scheme == "https":
            port = 443
        else:
            port = 80
    path = urlparse.urlunparse(('', '', path, params, query, fragment))
    if not path.startswith("/"):
        path = "/" + path
    if not _is_valid_host(host):
        return None
    if not utils.isascii(path):
        return None
    if not _is_valid_port(port):
        return None
    return scheme, host, port, path
示例#7
0
    def read_request(
        self,
        include_body=True,
        body_size_limit=None,
        allow_empty=False,
    ):
        """
        Parse an HTTP request from a file stream

        Args:
            include_body (bool): Read response body as well
            body_size_limit (bool): Maximum body size
            wfile (file): If specified, HTTP Expect headers are handled
            automatically, by writing a HTTP 100 CONTINUE response to the stream.

        Returns:
            Request: The HTTP request

        Raises:
            HttpError: If the input is invalid.
        """
        timestamp_start = time.time()
        if hasattr(self.tcp_handler.rfile, "reset_timestamps"):
            self.tcp_handler.rfile.reset_timestamps()

        httpversion, host, port, scheme, method, path, headers, body = (
            None, None, None, None, None, None, None, None)

        request_line = self._get_request_line()
        if not request_line:
            if allow_empty:
                return http.EmptyRequest()
            else:
                raise tcp.NetLibDisconnect()

        request_line_parts = self._parse_init(request_line)
        if not request_line_parts:
            raise HttpError(
                400,
                "Bad HTTP request line: %s" % repr(request_line)
            )
        method, path, httpversion = request_line_parts

        if path == '*' or path.startswith("/"):
            form_in = "relative"
            if not utils.isascii(path):
                raise HttpError(
                    400,
                    "Bad HTTP request line: %s" % repr(request_line)
                )
        elif method == 'CONNECT':
            form_in = "authority"
            r = self._parse_init_connect(request_line)
            if not r:
                raise HttpError(
                    400,
                    "Bad HTTP request line: %s" % repr(request_line)
                )
            host, port, httpversion = r
            path = None
        else:
            form_in = "absolute"
            r = self._parse_init_proxy(request_line)
            if not r:
                raise HttpError(
                    400,
                    "Bad HTTP request line: %s" % repr(request_line)
                )
            _, scheme, host, port, path, _ = r

        headers = self.read_headers()
        if headers is None:
            raise HttpError(400, "Invalid headers")

        expect_header = headers.get_first("expect", "").lower()
        if expect_header == "100-continue" and httpversion == (1, 1):
            self.tcp_handler.wfile.write(
                'HTTP/1.1 100 Continue\r\n'
                '\r\n'
            )
            self.tcp_handler.wfile.flush()
            del headers['expect']

        if include_body:
            body = self.read_http_body(
                headers,
                body_size_limit,
                method,
                None,
                True
            )

        if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"):
            # more accurate timestamp_start
            timestamp_start = self.tcp_handler.rfile.first_byte_timestamp

        timestamp_end = time.time()

        return http.Request(
            form_in,
            method,
            scheme,
            host,
            port,
            path,
            httpversion,
            headers,
            body,
            timestamp_start,
            timestamp_end,
        )
示例#8
0
    def read_request(
        self,
        include_body=True,
        body_size_limit=None,
        allow_empty=False,
    ):
        """
        Parse an HTTP request from a file stream

        Args:
            include_body (bool): Read response body as well
            body_size_limit (bool): Maximum body size
            wfile (file): If specified, HTTP Expect headers are handled
            automatically, by writing a HTTP 100 CONTINUE response to the stream.

        Returns:
            Request: The HTTP request

        Raises:
            HttpError: If the input is invalid.
        """
        timestamp_start = time.time()
        if hasattr(self.tcp_handler.rfile, "reset_timestamps"):
            self.tcp_handler.rfile.reset_timestamps()

        httpversion, host, port, scheme, method, path, headers, body = (
            None, None, None, None, None, None, None, None)

        request_line = self._get_request_line()
        if not request_line:
            if allow_empty:
                return http.EmptyRequest()
            else:
                raise tcp.NetLibDisconnect()

        request_line_parts = self._parse_init(request_line)
        if not request_line_parts:
            raise HttpError(400,
                            "Bad HTTP request line: %s" % repr(request_line))
        method, path, httpversion = request_line_parts

        if path == '*' or path.startswith("/"):
            form_in = "relative"
            if not utils.isascii(path):
                raise HttpError(
                    400, "Bad HTTP request line: %s" % repr(request_line))
        elif method == 'CONNECT':
            form_in = "authority"
            r = self._parse_init_connect(request_line)
            if not r:
                raise HttpError(
                    400, "Bad HTTP request line: %s" % repr(request_line))
            host, port, httpversion = r
            path = None
        else:
            form_in = "absolute"
            r = self._parse_init_proxy(request_line)
            if not r:
                raise HttpError(
                    400, "Bad HTTP request line: %s" % repr(request_line))
            _, scheme, host, port, path, _ = r

        headers = self.read_headers()
        if headers is None:
            raise HttpError(400, "Invalid headers")

        expect_header = headers.get_first("expect", "").lower()
        if expect_header == "100-continue" and httpversion == (1, 1):
            self.tcp_handler.wfile.write('HTTP/1.1 100 Continue\r\n' '\r\n')
            self.tcp_handler.wfile.flush()
            del headers['expect']

        if include_body:
            body = self.read_http_body(headers, body_size_limit, method, None,
                                       True)

        if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"):
            # more accurate timestamp_start
            timestamp_start = self.tcp_handler.rfile.first_byte_timestamp

        timestamp_end = time.time()

        return http.Request(
            form_in,
            method,
            scheme,
            host,
            port,
            path,
            httpversion,
            headers,
            body,
            timestamp_start,
            timestamp_end,
        )