def __init__(self, query: str, api: str, count: int = 200, cursor: bool = False, download: bool = True, verbose: bool = False, **kwds: str) -> None: """Class intended as superclass to perform a search query. :param query : A string of the query. :param api: The name of the Scopus API to be accessed. Allowed values: AffiliationSearch, AuthorSearch, ScopusSearch, SerialSearch, SubjectClass. :param count: The number of entries to be displayed at once. A smaller number means more queries with each query having fewer results. :param cursor: Whether to use the cursor in order to iterate over all search results without limit on the number of the results. In contrast to `start` parameter, the `cursor` parameter does not allow users to obtain partial results. :param download: Whether to download results (if they have not been cached) or not. :param verbose: Whether to print a download progress bar. :param kwds: Keywords passed on to requests header. Must contain fields and values specified in the respective API specification. Raises ------ ValueError If the api parameter is an invalid entry. """ # Construct query parameters params = {'count': count, 'view': self._view, **kwds} if isinstance(query, dict): params.update(query) name = "&".join( ["=".join(t) for t in zip(query.keys(), query.values())]) else: params['query'] = query name = query if cursor: params.update({'cursor': '*'}) else: if "start" not in params: params['start'] = 0 # Construct cache file path stem = md5(name.encode('utf8')).hexdigest() self._cache_file_path = get_folder(api, self._view) / stem # Init Base.__init__(self, params=params, url=URLS[api], download=download, api=api, verbose=verbose)
def __init__(self, identifier, api, refresh, view, id_type=None, date=None): """Class intended as superclass to perform retrievals. Parameters ---------- identifier : str or int A string of the query. api : str The name of the Scopus API to be accessed. Allowed values: AbstractRetrieval, AuthorRetrieval, CitationOverview, ContentAffiliationRetrieval. refresh : bool or int Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. view : str The view of the file that should be downloaded. id_type : str (optional, default=None) The type of used ID. Note: Will only take effect for the AbstractRetrieval API. date : str (optional, default=None) A string combining two years with a hyphen for which citations should be looked up for. Note: Will only take effect for the CitationOverview API. Raises ------ ValueError If the api parameter or view parameter is an invalid entry. """ # Checks if api not in RETRIEVAL_URL: raise ValueError('api parameter must be one of ' + ', '.join(RETRIEVAL_URL.keys())) # Construct parameters url = RETRIEVAL_URL[api] if api in ("AbstractRetrieval", "PlumXMetrics"): url += id_type + "/" params = {'view': view} if api == 'CitationOverview': params.update({'date': date, 'scopus_id': identifier.split('0-')[-1]}) url += identifier # Parse file contents qfile = join(get_folder(api, view), identifier.replace('/', '_')) Base.__init__(self, qfile, refresh, params=params, url=url) # print(self._json) self._view = view
def __init__(self, query, api, refresh, view='STANDARD', count=200, max_entries=5000, cursor=False, download=True, verbose=False, **kwds): """Class intended as superclass to perform a search query. Parameters ---------- query : str A string of the query. api : str The name of the Scopus API to be accessed. Allowed values: AffiliationSearch, AuthorSearch, ScopusSearch. refresh : bool or int Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. view : str The view of the file that should be downloaded. count : int (optional, default=200) The number of entries to be displayed at once. A smaller number means more queries with each query having less results. max_entries : int (optional, default=5000) Raise error when the number of results is beyond this number. To skip this check, set `max_entries` to `None`. Has no effect if cursor=True. cursor : str (optional, default=False) Whether to use the cursor in order to iterate over all search results without limit on the number of the results. In contrast to `start` parameter, the `cursor` parameter does not allow users to obtain partial results. download : bool (optional, default=True) Whether to download results (if they have not been cached) or not. kwds : key-value parings, optional Keywords passed on to requests header. Must contain fields and values specified in the respective API specification. Raises ------ ScopusQueryError If the number of search results exceeds max_entries. ValueError If the api parameter is an invalid entry. """ params = {'count': count, 'view': view} if isinstance(query, dict): params.update(query) name = "&".join( ["=".join(t) for t in zip(query.keys(), query.values())]) else: params['query'] = query name = query fname = md5(name.encode('utf8')).hexdigest() qfile = join(get_folder(api, view), fname) if cursor: params.update({'cursor': '*'}) else: params.update({'start': 0}) Base.__init__(self, qfile, refresh, params=params, url=SEARCH_URL[api], download=download, max_entries=max_entries, verbose=verbose) # Set query parameters self._view = view
def __init__(self, identifier, api, refresh, view, id_type=None, date=None, citation=None, **kwds): """Class intended as superclass to perform retrievals. Parameters ---------- identifier : str or int A string of the query. api : str The name of the Scopus API to be accessed. Allowed values: AbstractRetrieval, AuthorRetrieval, CitationOverview, AffiliationRetrieval. refresh : bool or int Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. view : str The view of the file that should be downloaded. id_type : str (optional, default=None) The type of used ID. Note: Will only take effect for the AbstractRetrieval API. date : str (optional, default=None) A string specifying a year or range of years (combining two years with a hyphen) for which either citations or yearly metric data (SJR, SNIP, yearly-data) should be looked up for. Note: Will only take effect for the CitationOverview and SerialTitle APIs. citation : str (optional, default=None) Allows for the exclusion of self-citations or those by books. If None, will count all citations. Note: Will only take effect for the CitationOverview API. kwds : key-value parings, optional Keywords passed on to requests header. Must contain fields and values specified in the respective API specification. Raises ------ KeyError If the api parameter is an invalid entry. """ # Construct parameters url = RETRIEVAL_URL[api] if api in ("AbstractRetrieval", "PlumXMetrics"): url += id_type + "/" params = {'view': view, **kwds} if api == 'CitationOverview': params.update({'date': date, 'scopus_id': identifier.split('0-')[-1], 'citation': citation}) if api == 'SerialTitle': params.update({'date': date}) url += identifier # Parse file contents qfile = join(get_folder(api, view), identifier.replace('/', '_')) Base.__init__(self, qfile, refresh, params=params, url=url) # print(self._json) self._view = view