Exemplo n.º 1
0
    def __prepare_request(self, request: Request) -> requests.PreparedRequest:
        """Create PreparedRequest from UserAgent's Request. Raises if one or more parameters are invalid."""
        method = request.method()
        if method is None:
            raise McRequestException("Request's method is None.")

        url = request.url()
        if url is None:
            raise McRequestException("Request's URL is None.")

        headers = request.headers()
        if headers is None:
            raise McRequestException("Request's headers is None.")

        auth_username = request.auth_username()
        auth_password = request.auth_password()
        if ((auth_username is None and auth_password is not None)
                or (auth_username is not None and auth_password is None)):
            raise McRequestException(
                "Either both or none of HTTP authentication credentials must be not None."
            )

        auth = None
        if auth_username is not None and auth_password is not None:
            if ((len(auth_username) == 0 and len(auth_password) > 0)
                    or (len(auth_username) > 0 and len(auth_password) == 0)):
                raise McRequestException(
                    "Either both or none of HTTP authentication credentials must be not Empty."
                )

            auth = HTTPBasicAuth(auth_username, auth_password)

        data = request.content()

        try:
            requests_request = requests.Request(
                method=method,
                url=url,
                data=data,
                headers=headers,
                auth=auth,
            )
            requests_prepared_request = self.__session.prepare_request(
                requests_request)

        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (
                str(request),
                str(ex),
            ))

        return requests_prepared_request
Exemplo n.º 2
0
    def __prepare_request(self, request: Request) -> requests.PreparedRequest:
        """Create PreparedRequest from UserAgent's Request. Raises if one or more parameters are invalid."""
        method = request.method()
        if method is None:
            raise McRequestException("Request's method is None.")

        url = request.url()
        if url is None:
            raise McRequestException("Request's URL is None.")

        headers = request.headers()
        if headers is None:
            raise McRequestException("Request's headers is None.")

        auth_username = request.auth_username()
        auth_password = request.auth_password()
        if ((auth_username is None and auth_password is not None) or (
                auth_username is not None and auth_password is None)):
            raise McRequestException("Either both or none of HTTP authentication credentials must be not None.")

        auth = None
        if auth_username is not None and auth_password is not None:
            if ((len(auth_username) == 0 and len(auth_password) > 0) or (
                    len(auth_username) > 0 and len(auth_password) == 0)):
                raise McRequestException("Either both or none of HTTP authentication credentials must be not Empty.")

            auth = HTTPBasicAuth(auth_username, auth_password)

        data = request.content()

        try:
            requests_request = requests.Request(
                method=method,
                url=url,
                data=data,
                headers=headers,
                auth=auth,
            )
            requests_prepared_request = self.__session.prepare_request(requests_request)

        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (str(request), str(ex),))

        return requests_prepared_request
Exemplo n.º 3
0
    def request(self, request: Request) -> Response:
        """Execute a request, return a response.

        All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted
        URLs etc."""

        if request is None:
            raise McRequestException("Request is None.")

        request = self.__blacklist_request_if_needed(request=request)

        self.__log_request(request=request)

        method = request.method()
        if method is None:
            raise McRequestException("Request's method is None.")

        url = request.url()
        if url is None:
            raise McRequestException("Request's URL is None.")

        headers = request.headers()
        if headers is None:
            raise McRequestException("Request's headers is None.")

        auth_username = request.auth_username()
        auth_password = request.auth_password()
        if ((auth_username is None and auth_password is not None)
                or (auth_username is not None and auth_password is None)):
            raise McRequestException(
                "Either both or none of HTTP authentication credentials must be not None."
            )

        auth = None
        if auth_username is not None and auth_password is not None:
            if ((len(auth_username) == 0 and len(auth_password) > 0)
                    or (len(auth_username) > 0 and len(auth_password) == 0)):
                raise McRequestException(
                    "Either both or none of HTTP authentication credentials must be not Empty."
                )

            auth = HTTPBasicAuth(auth_username, auth_password)

        data = request.content()

        try:
            requests_request = requests.Request(
                method=method,
                url=url,
                data=data,
                headers=headers,
                auth=auth,
            )
            requests_prepared_request = self.__session.prepare_request(
                requests_request)

        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (
                str(request),
                str(ex),
            ))

        error_is_client_side = False

        try:
            requests_response = self.__session.send(
                request=requests_prepared_request,
                timeout=self.timeout(),

                # To be able to enforce max_size
                stream=True,
            )

        except requests.TooManyRedirects as ex:

            # On too many redirects, return the last fetched page (just like LWP::UserAgent does)
            log.warning("Exceeded max. redirects for URL %s" % request.url())
            requests_response = ex.response
            response_data = str(ex)

        except requests.Timeout as ex:

            log.warning("Timeout for URL %s" % request.url())

            # We treat timeouts as client-side errors too because we can retry on them
            error_is_client_side = True

            requests_response = requests.Response()
            requests_response.status_code = HTTPStatus.REQUEST_TIMEOUT.value
            requests_response.reason = HTTPStatus.REQUEST_TIMEOUT.phrase
            requests_response.request = requests_prepared_request

            requests_response.history = []

            response_data = str(ex)

        except Exception as ex:

            # Client-side error
            log.warning("Client-side error while processing request %s: %s" % (
                str(request),
                str(ex),
            ))

            error_is_client_side = True

            requests_response = requests.Response()
            requests_response.status_code = HTTPStatus.BAD_REQUEST.value
            requests_response.reason = "Client-side error"
            requests_response.request = requests_prepared_request

            # Previous request / response chain is not built for client-side errored requests
            requests_response.history = []

            requests_response.headers = {
                # LWP::UserAgent compatibility
                'Client-Warning': 'Client-side error',
            }

            response_data = str(ex)

        else:

            try:

                max_size = self.max_size()

                response_data = ""
                read_response_data = True

                if max_size is not None:
                    content_length = requests_response.headers.get(
                        'Content-Length', None)

                    if content_length is not None:
                        content_length = int(content_length)
                        if content_length > max_size:
                            log.warning(
                                "Content-Length exceeds %d for URL %s" % (
                                    max_size,
                                    url,
                                ))

                            # Release the response to return connection back to the pool
                            # (http://docs.python-requests.org/en/master/user/advanced/#body-content-workflow)
                            requests_response.close()

                            read_response_data = False

                if read_response_data:

                    if requests_response.encoding is None:

                        if requests_response.apparent_encoding is None:
                            # If encoding is not in HTTP headers nor can be determined from content itself, assume that
                            # it's UTF-8
                            requests_response.encoding = 'UTF-8'

                        else:
                            # Test the encoding guesser's opinion, just like browsers do
                            requests_response.encoding = requests_response.apparent_encoding

                    else:

                        # If "Content-Type" HTTP header contains a string "text" and doesn't have "charset" property,
                        # "requests" falls back to setting the encoding to ISO-8859-1, which is probably not right
                        # (encoding might have been defined in the HTML content itself via <meta> tag), so we use the
                        # "apparent encoding" instead
                        if requests_response.encoding.lower() == 'iso-8859-1':
                            if requests_response.apparent_encoding is not None:
                                requests_response.encoding = requests_response.apparent_encoding

                    # Some pages report some funky encoding; in that case, fallback to UTF-8
                    try:
                        codecs.lookup(requests_response.encoding)
                    except LookupError:
                        log.warning("Invalid encoding %s for URL %s" %
                                    (requests_response.encoding,
                                     requests_response.url))
                        requests_response.encoding = 'UTF-8'

                    response_data_size = 0
                    for chunk in requests_response.iter_content(
                            chunk_size=None, decode_unicode=True):
                        response_data += chunk
                        response_data_size += len(chunk)

                        # Content-Length might be missing / lying, so we measure size while fetching the data too
                        if max_size is not None:
                            if response_data_size > max_size:
                                log.warning("Data size exceeds %d for URL %s" %
                                            (
                                                max_size,
                                                url,
                                            ))

                                # Release the response to return connection back to the pool
                                # (http://docs.python-requests.org/en/master/user/advanced/#body-content-workflow)
                                requests_response.close()

                                break

            except requests.RequestException as ex:

                log.warning("Error reading data for URL %s" % request.url())

                # We treat timeouts as client-side errors too because we can retry on them
                error_is_client_side = True

                requests_response = requests.Response()
                requests_response.status_code = HTTPStatus.REQUEST_TIMEOUT.value
                requests_response.reason = HTTPStatus.REQUEST_TIMEOUT.phrase
                requests_response.request = requests_prepared_request

                requests_response.history = []

                response_data = str(ex)

        if requests_response is None:
            raise McRequestException("Response from 'requests' is None.")

        if response_data is None:
            # Probably a programming error
            raise McRequestException("Response data is None.")

        response = Response.from_requests_response(
            requests_response=requests_response,
            data=response_data,
        )

        if error_is_client_side:
            response.set_error_is_client_side(
                error_is_client_side=error_is_client_side)

        # Build the previous request / response chain from the redirects
        current_response = response
        for previous_rq_response in reversed(requests_response.history):
            previous_rq_request = previous_rq_response.request
            previous_response_request = Request.from_requests_prepared_request(
                requests_prepared_request=previous_rq_request)

            previous_response = Response.from_requests_response(
                requests_response=previous_rq_response)
            previous_response.set_request(request=previous_response_request)

            current_response.set_previous(previous=previous_response)
            current_response = previous_response

        # Redirects might have happened, so we have to recreate the request object from the latest page that was
        # redirected to
        response_request = Request.from_requests_prepared_request(
            requests_prepared_request=requests_response.request)
        response.set_request(response_request)

        return response