Пример #1
0
    def __init__(self,
                 query: str,
                 api: str,
                 count: int = 200,
                 cursor: bool = False,
                 download: bool = True,
                 verbose: bool = False,
                 **kwds: str) -> None:
        """Class intended as superclass to perform a search query.

        :param query : A string of the query.
        :param api: The name of the Scopus API to be accessed.  Allowed values:
                    AffiliationSearch, AuthorSearch, ScopusSearch,
                    SerialSearch, SubjectClass.
        :param count: The number of entries to be displayed at once.  A smaller
                      number means more queries with each query having
                      fewer results.
        :param cursor: Whether to use the cursor in order to iterate over all
                      search results without limit on the number of the results.
                      In contrast to `start` parameter, the `cursor` parameter
                      does not allow users to obtain partial results.
        :param download: Whether to download results (if they have not been
                         cached) or not.
        :param verbose: Whether to print a download progress bar.
        :param kwds: Keywords passed on to requests header.  Must contain
                     fields and values specified in the respective API specification.

        Raises
        ------
        ValueError
            If the api parameter is an invalid entry.
        """
        # Construct query parameters
        params = {'count': count, 'view': self._view, **kwds}
        if isinstance(query, dict):
            params.update(query)
            name = "&".join(
                ["=".join(t) for t in zip(query.keys(), query.values())])
        else:
            params['query'] = query
            name = query
        if cursor:
            params.update({'cursor': '*'})
        else:
            if "start" not in params:
                params['start'] = 0

        # Construct cache file path
        stem = md5(name.encode('utf8')).hexdigest()
        self._cache_file_path = get_folder(api, self._view) / stem

        # Init
        Base.__init__(self,
                      params=params,
                      url=URLS[api],
                      download=download,
                      api=api,
                      verbose=verbose)
Пример #2
0
    def __init__(self, identifier, api, refresh, view, id_type=None,
                 date=None):
        """Class intended as superclass to perform retrievals.

        Parameters
        ----------
        identifier : str or int
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AbstractRetrieval, AuthorRetrieval, CitationOverview,
            ContentAffiliationRetrieval.

        refresh : bool or int
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str
            The view of the file that should be downloaded.

        id_type : str (optional, default=None)
            The type of used ID.
            Note: Will only take effect for the AbstractRetrieval API.

        date : str (optional, default=None)
            A string combining two years with a hyphen for which citations
            should be looked up for.
            Note: Will only take effect for the CitationOverview API.

        Raises
        ------
        ValueError
            If the api parameter or view parameter is an invalid entry.
        """
        # Checks
        if api not in RETRIEVAL_URL:
            raise ValueError('api parameter must be one of ' +
                             ', '.join(RETRIEVAL_URL.keys()))

        # Construct parameters
        url = RETRIEVAL_URL[api]
        if api in ("AbstractRetrieval", "PlumXMetrics"):
            url += id_type + "/"
        params = {'view': view}
        if api == 'CitationOverview':
            params.update({'date': date, 'scopus_id': identifier.split('0-')[-1]})
        url += identifier

        # Parse file contents
        qfile = join(get_folder(api, view), identifier.replace('/', '_'))
        Base.__init__(self, qfile, refresh, params=params, url=url)
        # print(self._json)
        self._view = view
Пример #3
0
    def __init__(self, query, api, refresh, view='STANDARD', count=200,
                 max_entries=5000, cursor=False, download_results=True, **kwds):
        """Class intended as superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AffiliationSearch, AuthorSearch, ScopusSearch.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        view : str
            The view of the file that should be downloaded.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            To skip this check, set `max_entries` to `None`.


        cursor : str (optional, default=False)
            Whether to use the cursor in order to iterate over all search
            results without limit on the number of the results.  In contrast
            to `start` parameter, the `cursor` parameter does not allow users
            to obtain partial results.

        download_results : bool (optional, default=True)
            Whether to download results (if they have not been cached) or not.

        kwds : key-value parings, optional
            Keywords passed on to requests header.  Must contain fields
            and values specified in the respective API specification.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds max_entries.

        ValueError
            If the api parameteris an invalid entry.
        """
        # Read the file contents if file exists and we are not refreshing,
        # otherwise download query anew and cache file
        fname = md5(query.encode('utf8')).hexdigest()
        qfile = join(get_folder(api, view), fname)
        if not refresh and exists(qfile):
            with open(qfile, "rb") as f:
                self._json = [loads(line) for line in f.readlines()]
            self._n = len(self._json)
        else:
            # Set query parameters
            params = {'query': query, 'count': count, 'view': view}
            if cursor:
                params.update({'cursor': '*'})
            else:
                params.update({'start': 0})
            # Download results
            res = download(url=SEARCH_URL[api], params=params, **kwds).json()
            n = int(res['search-results'].get('opensearch:totalResults', 0))
            self._n = n
            if not cursor and n > max_entries:  # Stop if there are too many results
                text = ('Found {} matches. Set max_entries to a higher '
                        'number, change your query ({}) or set '
                        'subscription=True'.format(n, query))
                raise ScopusQueryError(text)
            if download_results:
                self._json = _parse(res, params, n, api, **kwds)
                # Finally write out the file
                with open(qfile, 'wb') as f:
                    for item in self._json:
                        f.write('{}\n'.format(dumps(item)).encode('utf-8'))
            else:
                # Assures that properties will not result in an error
                self._json = []
        self._view = view
Пример #4
0
    def __init__(self,
                 query,
                 api,
                 refresh,
                 view='STANDARD',
                 count=200,
                 max_entries=5000,
                 cursor=False,
                 download=True,
                 verbose=False,
                 **kwds):
        """Class intended as superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AffiliationSearch, AuthorSearch, ScopusSearch.

        refresh : bool or int
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str
            The view of the file that should be downloaded.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            To skip this check, set `max_entries` to `None`. Has no
            effect if cursor=True.

        cursor : str (optional, default=False)
            Whether to use the cursor in order to iterate over all search
            results without limit on the number of the results.  In contrast
            to `start` parameter, the `cursor` parameter does not allow users
            to obtain partial results.

        download : bool (optional, default=True)
            Whether to download results (if they have not been cached) or not.

        kwds : key-value parings, optional
            Keywords passed on to requests header.  Must contain fields
            and values specified in the respective API specification.

        Raises
        ------
        ScopusQueryError
            If the number of search results exceeds max_entries.

        ValueError
            If the api parameter is an invalid entry.
        """
        params = {'count': count, 'view': view}
        if isinstance(query, dict):
            params.update(query)
            name = "&".join(
                ["=".join(t) for t in zip(query.keys(), query.values())])
        else:
            params['query'] = query
            name = query
        fname = md5(name.encode('utf8')).hexdigest()
        qfile = join(get_folder(api, view), fname)
        if cursor:
            params.update({'cursor': '*'})
        else:
            params.update({'start': 0})
        Base.__init__(self,
                      qfile,
                      refresh,
                      params=params,
                      url=SEARCH_URL[api],
                      download=download,
                      max_entries=max_entries,
                      verbose=verbose)
        # Set query parameters
        self._view = view
Пример #5
0
    def __init__(self, identifier, api, refresh, view, id_type=None,
                 date=None, citation=None, **kwds):
        """Class intended as superclass to perform retrievals.

        Parameters
        ----------
        identifier : str or int
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AbstractRetrieval, AuthorRetrieval, CitationOverview,
            AffiliationRetrieval.

        refresh : bool or int
            Whether to refresh the cached file if it exists or not.  If int
            is passed, cached file will be refreshed if the number of days
            since last modification exceeds that value.

        view : str
            The view of the file that should be downloaded.

        id_type : str (optional, default=None)
            The type of used ID.
            Note: Will only take effect for the AbstractRetrieval API.

        date : str (optional, default=None)
            A string specifying a year or range of years (combining two
            years with a hyphen) for which either citations or yearly
            metric data (SJR, SNIP, yearly-data) should be looked up for.
            Note: Will only take effect for the CitationOverview and
            SerialTitle APIs.

        citation : str (optional, default=None)
            Allows for the exclusion of self-citations or those by books.
            If None, will count all citations.
            Note: Will only take effect for the CitationOverview API.

        kwds : key-value parings, optional
            Keywords passed on to requests header.  Must contain fields
            and values specified in the respective API specification.

        Raises
        ------
        KeyError
            If the api parameter is an invalid entry.
        """
        # Construct parameters
        url = RETRIEVAL_URL[api]
        if api in ("AbstractRetrieval", "PlumXMetrics"):
            url += id_type + "/"
        params = {'view': view, **kwds}
        if api == 'CitationOverview':
            params.update({'date': date, 'scopus_id': identifier.split('0-')[-1], 'citation': citation})
        if api == 'SerialTitle':
            params.update({'date': date})
        url += identifier

        # Parse file contents
        qfile = join(get_folder(api, view), identifier.replace('/', '_'))
        Base.__init__(self, qfile, refresh, params=params, url=url)
        # print(self._json)
        self._view = view