Пример #1
0
def get_tb_page(arxiv_id: str) -> Response:
    """Get the data needed to display the trackback page for an arXiv article.

    Parameters
    ----------
    arxiv_id : str

    Returns
    -------
    dict
        Response data.
    int
        HTTP status code.
    dict
        Headers to add to the response.

    Raises
    ------
    InternalServerError
        Raised when there was an unexpected problem executing the query.
    TrackbackNotFound
        Raised when trackbacks for an article cannot be found, either because
        the identifier is invalid or the article metadata is not available.

    """
    response_data: Dict[str, Any] = {}
    response_headers: Dict[str, Any] = {}
    if not arxiv_id:
        raise TrackbackNotFound(data={'missing_id': True})
    try:
        arxiv_identifier = Identifier(arxiv_id=arxiv_id)
        redirect = check_supplied_identifier(arxiv_identifier,
                                             'browse.tb')
        if redirect:
            return redirect
        response_data['arxiv_identifier'] = arxiv_identifier
        abs_meta = metadata.get_abs(arxiv_identifier.id)
        if abs_meta:
            response_data['abs_meta'] = abs_meta
        trackback_pings = get_paper_trackback_pings(arxiv_identifier.id)
        response_data['trackback_pings'] = trackback_pings
        if len(trackback_pings) > 0:
            response_data['author_links'] = \
                split_long_author_list(queries_for_authors(
                    abs_meta.authors.raw), truncate_author_list_size)
        response_status = status.HTTP_200_OK

    except AbsNotFoundException:
        raise TrackbackNotFound(data={'arxiv_id': arxiv_id, 'not_found': True})
    except (AbsException, IdentifierException):
        raise TrackbackNotFound(data={'arxiv_id': arxiv_id})
    except Exception as ex:
        logger.warning(f'Error getting trackbacks: {ex}')
        raise InternalServerError from ex

    return response_data, response_status, response_headers
Пример #2
0
 def test_split_long_author_list(self):
     f1 = path_of_for_test(
         'data/abs_files/ftp/arxiv/papers/1411/1411.4413.abs')
     meta: metadata = AbsMetaSession.parse_abs_file(filename=f1)
     alst = split_long_author_list(queries_for_authors(str(meta.authors)),
                                   20)
     self.assertIs(type(alst), tuple)
     self.assertIs(len(alst), 3)
     self.assertIs(type(alst[0]), list)
     self.assertIs(type(alst[1]), list)
     self.assertGreater(len(alst[1]), 0)
     self.assertIs(type(alst[2]), int)
Пример #3
0
    def test_split_strange_author_list(self):
        """Test odd author list that shows '0 additional authors' ARXIVNG-2083"""
        f1 = path_of_for_test(
            'data/abs_files/ftp/arxiv/papers/1902/1902.05884.abs')
        meta: metadata = AbsMetaSession.parse_abs_file(filename=f1)
        alst = split_long_author_list(queries_for_authors(str(meta.authors)),
                                      100)

        self.assertIs(type(alst), tuple)
        self.assertIs(len(alst), 3)

        self.assertIs(type(alst[0]), list)
        self.assertIs(type(alst[1]), list)
        self.assertIs(type(alst[2]), int)

        self.assertEqual(
            len(list(filter(lambda x: isinstance(x, tuple), alst[0]))), 101)

        self.assertEqual(len(alst[1]), 0,
                         "Back list on 1902.05884 should be empty")
        self.assertEqual(alst[2], 0,
                         "Back list size on 1902.05884 should be empty")
Пример #4
0
def get_abs_page(arxiv_id: str) -> Response:
    """Get abs page data from the document metadata service.

    Parameters
    ----------
    arxiv_id : str
        The arXiv identifier as provided in the request.
    download_format_pref: str
        Download format preference.

    Returns
    -------
    dict
        Search result response data.
    int
        HTTP status code.
    dict
        Headers to add to the response.

    Raises
    ------
    :class:`.InternalServerError`
        Raised when there was an unexpected problem executing the query.
    """
    response_data: Dict[str, Any] = {}
    response_headers: Dict[str, Any] = {}
    try:
        arxiv_id = _check_legacy_id_params(arxiv_id)
        arxiv_identifier = Identifier(arxiv_id=arxiv_id)

        redirect = check_supplied_identifier(arxiv_identifier,
                                             'browse.abstract')
        if redirect:
            return redirect

        abs_meta = metadata.get_abs(arxiv_id)
        response_data['requested_id'] = arxiv_identifier.idv \
            if arxiv_identifier.has_version else arxiv_identifier.id
        response_data['abs_meta'] = abs_meta
        response_data['meta_tags'] = meta_tag_metadata(abs_meta)
        response_data['author_links'] = \
            split_long_author_list(queries_for_authors(
                abs_meta.authors.raw), truncate_author_list_size)
        response_data['url_for_author_search'] = \
            lambda author_query: url_for('search_archive',
                                         searchtype='author',
                                         archive=abs_meta.primary_archive.id,
                                         query=author_query)

        # Dissemination formats for download links
        download_format_pref = request.cookies.get('xxx-ps-defaults')
        add_sciencewise_ping = _check_sciencewise_ping(abs_meta.arxiv_id_v)
        response_data['formats'] = metadata.get_dissemination_formats(
            abs_meta, download_format_pref, add_sciencewise_ping)

        # Following are less critical and template must display without them
        # try:
        _non_critical_abs_data(abs_meta, arxiv_identifier, response_data)
        # except Exception:
        #    logger.warning("Error getting non-critical abs page data",
        #                   exc_info=app.debug)

    except AbsNotFoundException:
        if arxiv_identifier.is_old_id and arxiv_identifier.archive \
           in taxonomy.definitions.ARCHIVES:
            archive_name = taxonomy.definitions.ARCHIVES[
                arxiv_identifier.archive]['name']
            raise AbsNotFound(
                data={
                    'reason': 'old_id_not_found',
                    'arxiv_id': arxiv_id,
                    'archive_id': arxiv_identifier.archive,
                    'archive_name': archive_name
                })
        raise AbsNotFound(data={'reason': 'not_found', 'arxiv_id': arxiv_id})
    except AbsVersionNotFoundException:
        raise AbsNotFound(
            data={
                'reason': 'version_not_found',
                'arxiv_id': arxiv_identifier.idv,
                'arxiv_id_latest': arxiv_identifier.id
            })
    except AbsDeletedException as e:
        raise AbsNotFound(
            data={
                'reason': 'deleted',
                'arxiv_id_latest': arxiv_identifier.id,
                'message': e
            })
    except IdentifierIsArchiveException as e:
        raise AbsNotFound(data={
            'reason': 'is_archive',
            'arxiv_id': arxiv_id,
            'archive_name': e
        })
    except IdentifierException:
        raise AbsNotFound(data={'arxiv_id': arxiv_id})
    except AbsException as e:
        raise InternalServerError(
            'There was a problem. If this problem persists, please contact '
            '[email protected].') from e

    response_status = status.HTTP_200_OK

    not_modified = _check_request_headers(abs_meta, response_data,
                                          response_headers)
    if not_modified:
        return {}, status.HTTP_304_NOT_MODIFIED, response_headers

    return response_data, response_status, response_headers
Пример #5
0
def author_links(abs_meta: DocMetadata) -> Tuple[AuthorList, AuthorList, int]:
    """Creates author list links in a very similar way to abs page."""
    return split_long_author_list(queries_for_authors(abs_meta.authors.raw),
                                  truncate_author_list_size)