def __init__(self, identifier, id_type, refresh=False): """Interaction with the PlumX Metrics API. Parameters ---------- identifier : str The identifier of a document. id_type: str The type of used ID. Allowed values are: - 'airitiDocId' - 'arxivId' - 'cabiAbstractId' - 'citeulikeId' - 'digitalMeasuresArtifactId' - 'doi' - 'elsevierId' - 'elsevierPii' - 'facebookCountUrlId' - 'figshareArticleId' - 'githubRepoId' - 'isbn' - 'lccn' - 'medwaveId' - 'nctId' - 'oclc' - 'pittEprintDscholarId' - 'pmcid' - 'pmid' - 'redditId' - 'repecHandle' - 'repoUrl' - 'scieloId' - 'sdEid' - 'slideshareUrlId' - 'smithsonianPddrId' - 'soundcloudTrackId' - 'ssrnId' - 'urlId' - 'usPatentApplicationId' - 'usPatentPublicationId' - 'vimeoVideoId' - 'youtubeVideoId' refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/PlumXMetrics.html. Notes ----- The directory for cached results is `{path}/ENHANCED/{identifier}`, where `path` is specified in `~/.scopus/config.ini`. """ allowed_ids = ('airitiDocId', 'arxivId', 'cabiAbstractId', 'citeulikeId', 'digitalMeasuresArtifactId', 'doi', 'elsevierId', 'elsevierPii', 'facebookCountUrlId', 'figshareArticleId', 'githubRepoId', 'isbn', 'lccn', 'medwaveId', 'nctId', 'oclc', 'pittEprintDscholarId', 'pmcid', 'pmid', 'redditId', 'repecHandle', 'repoUrl', 'scieloId', 'sdEid', 'slideshareUrlId', 'smithsonianPddrId', 'soundcloudTrackId', 'ssrnId', 'urlId', 'usPatentApplicationId', 'usPatentPublicationId', 'vimeoVideoId', 'youtubeVideoId') if id_type not in allowed_ids: raise ValueError('Id type must be one of: ' + ', '.join(allowed_ids)) self.id_type = id_type self.identifier = identifier Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='PlumXMetrics', refresh=refresh, view='ENHANCED')
def __init__(self, identifier=None, refresh=False, view='META_ABS', id_type=None): """Interaction with the Abstract Retrieval API. Parameters ---------- identifier : str or int The identifier of a document. Can be the Scopus EID, the Scopus ID, the PII, the Pubmed-ID or the DOI. refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. id_type: str (optional, default=None) The type of used ID. Allowed values: None, 'eid', 'pii', 'scopus_id', 'pubmed_id', 'doi'. If the value is None, the function tries to infer the ID type itself. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Allowed values: META, META_ABS, REF, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view. For details see https://dev.elsevier.com/guides/AbstractRetrievalViews.htm. Raises ------ ValueError If the id_type parameter or the view parameter contains invalid entries. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/AbstractRetrieval.html. Notes ----- The directory for cached results is `{path}/{view}/{identifier}`, where `path` is specified in `~/.scopus/config.ini`. In case `identifier` is a DOI,, an underscore replaces the forward slash. """ # Checks identifier = str(identifier) allowed_views = ('META', 'META_ABS', 'REF', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) if id_type is None: id_type = detect_id_type(identifier) else: allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi') if id_type not in allowed_id_types: raise ValueError('id_type parameter must be one of ' + ', '.join(allowed_id_types)) # Load json Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='AbstractRetrieval', refresh=refresh, view=view) self._json = self._json['abstracts-retrieval-response'] self._head = chained_get(self._json, ["item", "bibrecord", "head"], {}) conf_path = [ 'source', 'additional-srcinfo', 'conferenceinfo', 'confevent' ] self._confevent = chained_get(self._head, conf_path, {}) if self._view == "REF": ref_path = ["references"] else: ref_path = ['item', 'bibrecord', 'tail', 'bibliography'] self._ref = chained_get(self._json, ref_path, {})
def __init__(self, identifier: str, id_type: str, refresh: Union[bool, int] = False, **kwds: str ) -> None: """Interaction with the PlumX Metrics API. :param identifier: The identifier of a document. :param id_type: The type of used ID. Allowed values are: - 'airitiDocId' - 'arxivId' - 'cabiAbstractId' - 'citeulikeId' - 'digitalMeasuresArtifactId' - 'doi' - 'elsevierId' - 'elsevierPii' - 'facebookCountUrlId' - 'figshareArticleId' - 'githubRepoId' - 'isbn' - 'lccn' - 'medwaveId' - 'nctId' - 'oclc' - 'pittEprintDscholarId' - 'pmcid' - 'pmid' - 'redditId' - 'repecHandle' - 'repoUrl' - 'scieloId' - 'sdEid' - 'slideshareUrlId' - 'smithsonianPddrId' - 'soundcloudTrackId' - 'ssrnId' - 'urlId' - 'usPatentApplicationId' - 'usPatentPublicationId' - 'vimeoVideoId' - 'youtubeVideoId' :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/PlumXMetricsAPI.wadl. Raises ------ ValueError If the parameter `refresh` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/ENHANCED/{identifier}`, where `path` is specified in your configuration file. """ # Checks allowed = ('airitiDocId', 'arxivId', 'cabiAbstractId', 'citeulikeId', 'digitalMeasuresArtifactId', 'doi', 'elsevierId', 'elsevierPii', 'facebookCountUrlId', 'figshareArticleId', 'githubRepoId', 'isbn', 'lccn', 'medwaveId', 'nctId', 'oclc', 'pittEprintDscholarId', 'pmcid', 'pmid', 'redditId', 'repecHandle', 'repoUrl', 'scieloId', 'sdEid', 'slideshareUrlId', 'smithsonianPddrId', 'soundcloudTrackId', 'ssrnId', 'urlId', 'usPatentApplicationId', 'usPatentPublicationId', 'vimeoVideoId', 'youtubeVideoId') check_parameter_value(id_type, allowed, "id_type") self._id_type = id_type self._identifier = identifier # Load json self._refresh = refresh self._view = 'ENHANCED' Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='PlumXMetrics', **kwds) cats = self._json.get('count_categories', []) self._count_categories = {d["name"]: d['count_types'] for d in cats}
def __init__(self, identifier: List[Union[int, str]], start: Union[int, str], end: Union[int, str] = datetime.now().year, id_type: str = "scopus_id", eid: str = None, refresh: Union[bool, int] = False, citation: Optional[str] = None, **kwds: str) -> None: """Interaction witht the Citation Overview API. :param identifier: Up to 25 identifiers for which to look up citations. Must be Scopus IDs, DOIs, PIIs or Pubmed IDs. :param start: The first year for which the citation count should be loaded. :param end: The last year for which the citation count should be loaded. Defaults to the current year. :param id_type: The type of the IDs provided in `identifier`. Must be one of "scopus_id", "doi", "pii", "pubmed_id". :param eid: (deprecated) The Scopus ID of the abstract - will be removed in a future release: Instead use param `scopus_id` after stripping the part until the second hyphen. If you use this parameter, it will be converted to `scopus_id` instead. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param citation: Allows for the exclusion of self-citations or those by books. If `None`, will count all citations. Allowed values: None, exclude-self, exclude-books :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/AbstractCitationAPI.wadl. Raises ----- ValueError If parameter `identifier` contains fewer than 1 or more than 25 elements. ValueError If any of the parameters `citation`, `id_type` or `refresh` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/STANDARD/{id}-{citation}`, where `path` is specified in your configuration file, and `id` the md5-hashed version of a string joining `identifier` on underscore. Your API Key needs to be augmented by Elsevier's Scopus Integration Team to access this API. """ # Checks allowed = ('scopus_id', 'doi', 'pii', 'pubmed_id') check_parameter_value(id_type, allowed, "id_type") if citation: allowed = ('exclude-self', 'exclude-books') check_parameter_value(citation, allowed, "citation") if eid or not isinstance(identifier, list): msg = "Parameter `eid` is deprecated and will be removed in a "\ "future release. Instead, provide the corresponding "\ "Scopus ID via parameter `identifier` as a list, and set "\ "`id_type='scopus_id'`." warn(msg, FutureWarning) if len(identifier) < 0 or len(identifier) > 25: msg = "Provide at least 1 and at most than 25 identifiers" raise ValueError(msg) # Variables identifier = [str(i) for i in identifier] self._start = int(start) self._end = int(end) self._citation = citation self._refresh = refresh self._view = "STANDARD" # Get file content date = f'{start}-{end}' kwds.update({id_type: identifier}) stem = md5("_".join(identifier).encode('utf8')).hexdigest() Retrieval.__init__(self, stem, api='CitationOverview', date=date, citation=citation, **kwds) self._data = self._json['abstract-citations-response'] # citeInfoMatrix matrix = self._data['citeInfoMatrix']['citeInfoMatrixXML'][ 'citationMatrix']['citeInfo'] self._citeInfoMatrix = [_parse_dict(e) for e in matrix] # identifier-legend identifier = self._data['identifier-legend']['identifier'] self._identifierlegend = [_parse_dict(e) for e in identifier] # citeCountHeader self._citeCountHeader = self._data['citeColumnTotalXML'][ "citeCountHeader"]
def __init__(self, eid, start, end=datetime.now().year, citation=None, refresh=False): """Interaction witht the Citation Overview API. Parameters ---------- eid : str The EID of the abstract. start : str or int The first year for which the citation count should be loaded end : str or int (optional, default=datetime.now().year) The last year for which the citation count should be loaded. Default is the current year. citation : str (optional, default=None) Allows for the exclusion of self-citations or those by books. If None, will count all citations. Allowed values: None, exclude-self, exclude-books refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/CitationOverview.html. Notes ----- The directory for cached results is `{path}/STANDARD/{eid}`, where `path` is specified in `~/.scopus/config.ini`. Your API Key needs to be approved by Elsevier to access this API. """ # Checks if citation: allowed = ('exclude-self', 'exclude-books') check_parameter_value(citation, allowed, "citation") # Variables self._start = int(start) self._end = int(end) self._citation = citation view = "STANDARD" # In case Scopus adds different views in future # Get file content date = f'{start}-{end}' Retrieval.__init__(self, eid, 'CitationOverview', refresh, view=view, date=date, citation=citation) self._data = self._json['abstract-citations-response'] # citeInfoMatrix m = self._data['citeInfoMatrix']['citeInfoMatrixXML']['citationMatrix']['citeInfo'][0] self._citeInfoMatrix = _parse_dict(m) # identifier-legend l = self._data['identifier-legend']['identifier'][0] self._identifierlegend = _parse_dict(l) # citeColumnTotalXML self._citeColumnTotalXML = self._data['citeColumnTotalXML'] # not used
def __init__(self, author_id: Union[int, str], refresh: Union[bool, int] = False, view: str = "ENHANCED", **kwds: str) -> None: """Interaction with the Author Retrieval API. :param author_id: The ID or the EID of the author. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param view: The view of the file that should be downloaded. Allowed values: METRICS, LIGHT, STANDARD, ENHANCED, where STANDARD includes all information of LIGHT view and ENHANCED includes all information of any view. For details see https://dev.elsevier.com/sc_author_retrieval_views.html. Note: Neither the BASIC nor the DOCUMENTS view are active, although documented. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the API specification at https://dev.elsevier.com/documentation/AuthorRetrievalAPI.wadl. Raises ------ ValueError If any of the parameters `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/ENHANCED/{author_id}`, where `path` is specified in your configuration file, and `author_id` is stripped of an eventually leading `'9-s2.0-'`. """ # Checks allowed_views = ('METRICS', 'LIGHT', 'STANDARD', 'ENHANCED') check_parameter_value(view, allowed_views, "view") # Load json self._id = str(author_id).split('-')[-1] self._view = view self._refresh = refresh Retrieval.__init__(self, identifier=self._id, api='AuthorRetrieval', **kwds) # Parse json self._json = self._json['author-retrieval-response'] try: self._json = self._json[0] except KeyError: # Incomplete forward alias_json = listify(self._json['alias']['prism:url']) self._alias = [d['$'].split(':')[-1] for d in alias_json] alias_str = ', '.join(self._alias) text = f'Author profile with ID {author_id} has been merged and '\ f'the main profile is now one of {alias_str}. Please update '\ 'your records manually. Functionality of this object is '\ 'reduced.' warn(text, UserWarning) else: self._alias = None self._profile = self._json.get("author-profile", {})
def __init__(self, identifier: Union[int, str] = None, refresh: Union[bool, int] = False, view: str = 'META_ABS', id_type: str = None, **kwds: str) -> None: """Interaction with the Abstract Retrieval API. :param identifier: The identifier of a document. Can be the Scopus EID , the Scopus ID, the PII, the Pubmed-ID or the DOI. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param id_type: The type of used ID. Allowed values: None, 'eid', 'pii', 'scopus_id', 'pubmed_id', 'doi'. If the value is None, the function tries to infer the ID type itself. :param view: The view of the file that should be downloaded. Allowed values: META, META_ABS, REF, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view. For details see https://dev.elsevier.com/sc_abstract_retrieval_views.html. :param kwds: Keywords passed on as query parameters. Must contain fields and values listed in the API specification at https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl. Raises ------ ValueError If any of the parameters `id_type`, `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/{view}/{identifier}`, where `path` is specified in your configuration file. In case `identifier` is a DOI, an underscore replaces the forward slash. """ # Checks identifier = str(identifier) check_parameter_value(view, ('META', 'META_ABS', 'REF', 'FULL'), "view") if id_type is None: id_type = detect_id_type(identifier) else: allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi') check_parameter_value(id_type, allowed_id_types, "id_type") # Load json self._view = view self._refresh = refresh Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='AbstractRetrieval', **kwds) self._json = self._json['abstracts-retrieval-response'] self._head = chained_get(self._json, ["item", "bibrecord", "head"], {}) conf_path = [ 'source', 'additional-srcinfo', 'conferenceinfo', 'confevent' ] self._confevent = chained_get(self._head, conf_path, {}) if self._view == "REF": ref_path = ["references"] else: ref_path = ['item', 'bibrecord', 'tail', 'bibliography'] self._ref = chained_get(self._json, ref_path, {})