Пример #1
0
def get_seeded_content(db: DatabaseHandler,
                       topic_fetch_url: dict) -> typing.Optional[str]:
    """Return content for this url and topic in topic_seed_urls.

    Arguments:
    db - db handle
    topic_fetch_url - topic_fetch_url dict from db

    Returns:
    dummy response object

    """
    r = db.query(
        "select content from topic_seed_urls where topics_id = %(a)s and url = %(b)s and content is not null",
        {
            'a': topic_fetch_url['topics_id'],
            'b': topic_fetch_url['url']
        }).flat()

    if len(r) == 0:
        return None

    response = Response(code=200, message='OK', headers={}, data=r[0])
    response.set_request(Request('GET', topic_fetch_url['url']))

    return response
Пример #2
0
def _make_dummy_bypassed_response(url: str) -> Response:
    """Given a url, make and return a response object with that url and empty content."""
    response = Response(code=200, message='OK', headers={}, data='')
    response.set_request(Request('GET', url))

    return response
Пример #3
0
    def request(self, request: Request) -> Response:
        """Execute a request, return a response.

        All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted
        URLs etc."""

        if request is None:
            raise McRequestException("Request is None.")

        request = self.__blacklist_request_if_needed(request=request)

        self.__log_request(request=request)

        try:
            requests_prepared_request = self.__prepare_request(request)
        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (str(request), str(ex),))

        try:
            user_agent_response = self.__execute_request(requests_prepared_request)
        except Exception as ex:
            raise McRequestException("Unable to execute request %s: %s" % (str(requests_prepared_request), str(ex),))

        if user_agent_response.requests_response is None:
            raise McRequestException("Response from 'requests' is None.")

        response = Response(
            requests_response=user_agent_response.requests_response,
            max_size=self.max_size(),
            error_is_client_side=user_agent_response.error_is_client_side,
        )

        # Build the previous request / response chain from the redirects
        current_response = response
        for previous_rq_response in reversed(user_agent_response.requests_response.history):
            previous_rq_request = previous_rq_response.request
            previous_response_request = Request.from_requests_prepared_request(
                requests_prepared_request=previous_rq_request
            )

            # Sometimes reading the (chunked?) previous response's data fails with:
            #
            #      AttributeError: 'NoneType' object has no attribute 'readline'
            #
            # Previous response's data is not that important, so fail rather silently.
            try:
                previous_rq_response.text
            except Exception as ex:
                log.warning("Reading previous response's data failed: %s" % str(ex))
                previous_rq_response.raw_data = io.StringIO('')

            previous_response = Response(requests_response=previous_rq_response, max_size=self.max_size())
            previous_response.set_request(request=previous_response_request)

            current_response.set_previous(previous=previous_response)
            current_response = previous_response

        # Redirects might have happened, so we have to recreate the request object from the latest page that was
        # redirected to
        response_request = Request.from_requests_prepared_request(
            requests_prepared_request=user_agent_response.requests_response.request
        )
        response.set_request(response_request)

        return response
Пример #4
0
    def request(self, request: Request) -> Response:
        """Execute a request, return a response.

        All other helpers are supposed to use request() internally as it implements max. size, callbacks, blacklisted
        URLs etc."""

        if request is None:
            raise McRequestException("Request is None.")

        request = self.__blacklist_request_if_needed(request=request)

        self.__log_request(request=request)

        try:
            requests_prepared_request = self.__prepare_request(request)
        except Exception as ex:
            raise McRequestException("Unable to prepare request %s: %s" % (
                str(request),
                str(ex),
            ))

        try:
            user_agent_response = self.__execute_request(
                requests_prepared_request)
        except Exception as ex:
            raise ex
            raise McRequestException("Unable to execute request %s: %s" % (
                str(requests_prepared_request),
                str(ex),
            ))

        if user_agent_response.requests_response is None:
            raise McRequestException("Response from 'requests' is None.")

        response = Response(
            requests_response=user_agent_response.requests_response,
            max_size=self.max_size(),
            error_is_client_side=user_agent_response.error_is_client_side,
        )

        # Build the previous request / response chain from the redirects
        current_response = response
        for previous_rq_response in reversed(
                user_agent_response.requests_response.history):
            previous_rq_request = previous_rq_response.request
            previous_response_request = Request.from_requests_prepared_request(
                requests_prepared_request=previous_rq_request)

            # Sometimes reading the (chunked?) previous response's data fails with:
            #
            #      AttributeError: 'NoneType' object has no attribute 'readline'
            #
            # Previous response's data is not that important, so fail rather silently.
            try:
                previous_rq_response.text
            except Exception as ex:
                log.warning("Reading previous response's data failed: %s" %
                            str(ex))
                previous_rq_response.raw_data = io.StringIO('')

            previous_response = Response(
                requests_response=previous_rq_response,
                max_size=self.max_size())
            previous_response.set_request(request=previous_response_request)

            current_response.set_previous(previous=previous_response)
            current_response = previous_response

        # Redirects might have happened, so we have to recreate the request object from the latest page that was
        # redirected to
        response_request = Request.from_requests_prepared_request(
            requests_prepared_request=user_agent_response.requests_response.
            request)
        response.set_request(response_request)

        return response