Пример #1
0
 def test_raise_ioerror_on_503(self, mock_get):
     """:func:`.metadata.retrieve` raises IOError when unvailable."""
     response = mock.MagicMock()
     type(response).json = mock.MagicMock(return_value=None)
     response.status_code = 503
     mock_get.return_value = response
     with self.assertRaises(IOError):
         metadata.retrieve('1234.5678v3')
Пример #2
0
 def test_raise_ioerror_on_sslerror(self, mock_get):
     """:func:`.metadata.retrieve` raises IOError when SSL fails."""
     from requests.exceptions import SSLError
     mock_get.side_effect = SSLError
     with self.assertRaises(IOError):
         try:
             metadata.retrieve('1234.5678v3')
         except Exception as e:
             if type(e) is SSLError:
                 self.fail('Should not return dependency exception')
             raise
Пример #3
0
    def _get_metadata(self, arxiv_id: str) -> DocMeta:
        """
        Retrieve metadata from the :mod:`.metadata` service.

        Parameters
        ----------
        arxiv_id : str
            Am arXiv identifier, with or without a version affix.

        Returns
        -------
        :class:`.DocMeta`
            Metadata for the arXiv paper.

        Raises
        ------
        DocumentFailed
            Indexing of the document failed. This may have no bearing on the
            success of subsequent papers.
        IndexingFailed
            Indexing of the document failed in a way that indicates recovery
            is unlikely for subsequent papers.

        """
        logger.debug(f'{arxiv_id}: get metadata')

        try:
            logger.debug(f'{arxiv_id}: requesting metadata')
            docmeta: DocMeta = metadata.retrieve(arxiv_id)
        except metadata.ConnectionFailed as e:
            # The metadata service will retry bad responses, but not connection
            # errors. Sometimes it just takes another try, so why not.
            logger.warning(f'{arxiv_id}: first attempt failed, retrying')
            try:
                docmeta = metadata.retrieve(arxiv_id)
            except metadata.ConnectionFailed as e:
                # Things really are looking bad. There is no need to keep
                # trying with subsequent records, so let's abort entirely.
                logger.error(f'{arxiv_id}: second attempt failed, giving up')
                raise IndexingFailed(
                    'Indexing failed; metadata endpoint could not be reached.'
                ) from e
        except metadata.RequestFailed as e:
            logger.error(f'{arxiv_id}: request failed')
            raise DocumentFailed('Request to metadata service failed') from e
        except metadata.BadResponse as e:
            logger.error(f'{arxiv_id}: bad response from metadata service')
            raise DocumentFailed('Bad response from metadata service') from e
        except Exception as e:
            logger.error(f'{arxiv_id}: unhandled error, metadata service: {e}')
            raise IndexingFailed('Unhandled exception') from e
        return docmeta
Пример #4
0
    def test_response_is_not_json(self, mock_get):
        """:func:`.metadata.retrieve` raises IOError when not valid JSON."""
        from json.decoder import JSONDecodeError
        response = mock.MagicMock()

        # Ideally we would pass the exception itself as a side_effect, but it
        #  doesn't have the expected signature.
        def raise_decodeerror(*args, **kwargs):
            raise JSONDecodeError('Nope', 'Nope', 0)

        type(response).json = mock.MagicMock(side_effect=raise_decodeerror)
        response.status_code = 200
        mock_get.return_value = response
        with self.assertRaises(IOError):
            metadata.retrieve('1234.5678v3')