Пример #1
0
def ungzipped_response_content(response: Response) -> str:
    """Return HTTP response's decoded content, gunzip it if neccessary."""

    if __response_is_gzipped_data(response):
        gzipped_data = response.raw_data()
        try:
            data = gunzip(gzipped_data).decode('utf-8', errors='replace')
        except McGunzipException as ex:
            log.error("Unable to gunzip response {}: {}".format(response, ex))
            data = response.decoded_content()

    else:
        data = response.decoded_content()

    assert isinstance(data, str)

    return data
Пример #2
0
    def store_response(self, db: DatabaseHandler, download: dict,
                       response: Response) -> None:

        download = decode_object_from_bytes_if_needed(download)

        downloads_id = download['downloads_id']
        download_url = download['url']

        log.info(f"Handling download {downloads_id}...")
        log.debug(
            f"(URL of download {downloads_id} which is about to be handled: {download_url})"
        )

        if not response.is_success():
            log.info(
                f"Download {downloads_id} errored: {response.decoded_content()}"
            )
            self._store_failed_download_error_message(db=db,
                                                      download=download,
                                                      response=response)
            return

        supported_content_types_regex = re.compile(
            r'text|html|xml|rss|atom|application/json', flags=re.IGNORECASE)
        if re.search(supported_content_types_regex,
                     response.content_type() or ''):
            content = response.decoded_content()
        else:
            content = '(unsupported content type)'

        db.query(
            """
            UPDATE downloads
            SET url = %(download_url)s
            WHERE downloads_id = %(downloads_id)s
              AND url != %(download_url)s
        """, {
                'downloads_id': downloads_id,
                'download_url': download_url,
            })

        story_ids_to_extract = self.store_download(db=db,
                                                   download=download,
                                                   content=content)

        for stories_id in story_ids_to_extract:
            log.debug(
                f"Adding story {stories_id} for download {downloads_id} to extraction queue..."
            )
            JobBroker(
                queue_name='MediaWords::Job::ExtractAndVector').add_to_queue(
                    stories_id=stories_id)

        log.info(f"Handled download {downloads_id}...")
        log.debug(
            f"(URL of download {downloads_id} that was just handled: {download_url})"
        )
Пример #3
0
def __solr_error_message_from_response(response: Response) -> str:
    """Parse out Solr error message from response."""

    if response.error_is_client_side():
        # UserAgent error (UserAgent wasn't able to connect to the server or something like that)
        error_message = f'UserAgent error: {response.decoded_content()}'

    else:

        status_code_str = str(response.code())

        if status_code_str.startswith('4'):
            # Client error - set default message
            error_message = f'Client error: {response.status_line()} {response.decoded_content()}'

            # Parse out Solr error message if there is one
            solr_response_maybe_json = response.decoded_content()
            if solr_response_maybe_json:

                solr_response_json = {}
                try:
                    solr_response_json = response.decoded_json()
                except Exception as ex:
                    log.debug(
                        f"Unable to parse Solr error response: {ex}; raw response: {solr_response_maybe_json}"
                    )

                error_message = solr_response_json.get('error',
                                                       {}).get('msg', {})
                request_params = solr_response_json.get('responseHeader',
                                                        {}).get('params', {})

                if error_message and request_params:
                    request_params_json = encode_json(request_params)

                    # If we were able to decode Solr error message, overwrite the default error message with it
                    error_message = f'Solr error: "{error_message}", params: {request_params_json}'

        elif status_code_str.startswith('5'):
            # Server error or some other error
            error_message = f'Server error: {response.status_line()} {response.decoded_content()}'

        else:
            # Some weird stuff
            error_message = f'Other error error: {response.status_line()} {response.decoded_content()}'

    return error_message