def confsponsor(self): """Sponsor(s) of the conference the document belongs to.""" path = ['confsponsors', 'confsponsor'] sponsors = chained_get(self._confevent, path, []) if len(sponsors) == 0: return None if isinstance(sponsors, list): return [s['$'] for s in sponsors] return sponsors
def classificationgroup(self): """List with (subject group ID, number of documents)-tuples.""" path = [ 'author-profile', 'classificationgroup', 'classifications', 'classification' ] out = [(item['$'], item['@frequency']) for item in listify(chained_get(self._json, path, []))] return out or None
def refcount(self): """Number of references of an article. Note: Requires the FULL view of the article. """ if self._view == "REF": path = ["references", '@total-references'] else: path = ['item', 'bibrecord', 'tail', 'bibliography', '@refcount'] return chained_get(self._json, path)
def subject_areas(self): """List of named tuples of subject areas in the form (area, abbreviation, code) of author's publication. """ path = ['subject-areas', 'subject-area'] area = namedtuple('Subjectarea', 'area abbreviation code') areas = [area(area=item['$'], code=item['@code'], abbreviation=item['@abbrev']) for item in chained_get(self._json, path, [])] return areas or None
def name_variants(self) -> List[NamedTuple]: """A list of namedtuples representing variants of the affiliation name with number of documents referring to this variant. """ variant = namedtuple('Variant', 'name doc_count') path = ['name-variants', 'name-variant'] return [ variant(name=var['$'], doc_count=int(var['@doc-count'])) for var in chained_get(self._json, path, []) ]
def date_created(self) -> Optional[Tuple[int, int, int]]: """Return the description of a record. Note: If this is empty, try property abstract instead. """ path = ["item", "bibrecord", "item-info", "history"] d = chained_get(self._json, path, {}) try: return parse_date_created(d) except KeyError: return None
def subject_areas(self): """List of namedtuples containing subject areas of the article in the form (area abbreviation code). Note: Requires the FULL view of the article. """ area = namedtuple('Area', 'area abbreviation code') path = ['subject-areas', 'subject-area'] out = [area(area=item['$'], abbreviation=item['@abbrev'], code=item['@code']) for item in listify(chained_get(self._json, path, []))] return out or None
def affiliation_history(self): """A list of namedtuples representing the authors's historical affiliation(s), in the form (id parent type relationship afdispname preferred_name parent_preferred_name country_code country address_part city state postal_code org_domain org_URL). Note: Affiliation information might be missing or mal-assigned even when it lookes correct in the web view. In this case please request a correction. """ path = ["author-profile", "affiliation-history", "affiliation"] return parse_affiliation(chained_get(self._json, path))
def name_variants(self): """List of named tuples containing variants of the author name with number of documents published with that variant. """ fields = 'indexed_name initials surname given_name doc_count' variant = namedtuple('Variant', fields) path = ['author-profile', 'name-variant'] out = [variant(indexed_name=var['indexed-name'], surname=var['surname'], doc_count=var.get('@doc-count'), initials=var['initials'], given_name=var.get('given-name')) for var in listify(chained_get(self._json, path, []))] return out or None
def journal_history(self): """List of named tuples of authored publications in the form (sourcetitle, abbreviation, type, issn). issn is only given for journals. abbreviation and issn may be None. """ jour = namedtuple('Journal', 'sourcetitle abbreviation type issn') path = ['author-profile', 'journal-history', 'journal'] hist = [jour(sourcetitle=pub.get('sourcetitle'), issn=pub.get('issn'), abbreviation=pub.get('sourcetitle-abbrev'), type=pub.get('@type')) for pub in listify(chained_get(self._json, path, []))] return hist or None
def affiliation_current(self) -> Optional[List[NamedTuple]]: """A list of namedtuples representing the authors's current affiliation(s), in the form (id parent type relationship afdispname preferred_name parent_preferred_name country_code country address_part city state postal_code org_domain org_URL). Note: Affiliation information might be missing or mal-assigned even when it lookes correct in the web view. In this case please request a correction. """ affs = chained_get(self._profile, ["affiliation-current", "affiliation"]) return parse_affiliation(affs)
def isbn(self): """ISBNs belonging to publicationName as tuple of variying length, (e.g. ISBN-10 or ISBN-13).""" isbns = listify(chained_get(self._head, ['source', 'isbn'], [])) try: if len(isbns) == 0: return None elif isinstance(isbns, str): return tuple((isbns, )) else: return tuple((i['$'] for i in isbns)) except TypeError: return tuple((isbns, ))
def authors(self): """A list of namedtuples representing the article's authors, in the form (auid, indexed_name, surname, given_name, affiliation_id, affiliation, city, country). Note: The affiliation referred to here is what Scopus' algorithm determined as the main affiliation. Property `authorgroup` provides all affiliations. """ out = [] fields = 'auid indexed_name surname given_name affiliation' auth = namedtuple('Author', fields) for item in chained_get(self._json, ['authors', 'author'], []): affs = [a for a in listify(item.get('affiliation')) if a] if affs: aff = [aff.get('@id') for aff in affs] else: aff = None new = auth(auid=item['@auid'], surname=item.get('ce:surname'), indexed_name=item.get('ce:indexed-name'), affiliation=aff, given_name=chained_get(item, ['preferred-name', 'ce:given-name'])) out.append(new) return out or None
def affiliation_history(self): """A list of namedtuples representing the authors's historical affiliation(s), in the form (id parent type relationship afdispname preferred_name parent_preferred_name country_code country address_part city state postal_code org_domain org_URL). Note: Affiliation information might be missing or mal-assigned even when it lookes correct in the web view. In this case please request a correction. Note: Unlike on their website, Scopus doesn't provide the periods of affiliation. """ affs = chained_get(self._profile, ["affiliation-history", "affiliation"]) return parse_affiliation(affs)
def sequencebank(self): """List of namedtuples representing biological entities defined or mentioned in the text, in the form (name, sequence_number, type). """ path = ['enhancement', 'sequencebanks', 'sequencebank'] items = listify(chained_get(self._head, path, [])) bank = namedtuple('Sequencebank', 'name sequence_number type') out = [] for item in items: numbers = listify(item['sequence-number']) for number in numbers: new = bank(name=item['@name'], sequence_number=number['$'], type=number['@type']) out.append(new) return out or None
def authorgroup(self): """A list of namedtuples representing the article's authors organized by affiliation, in the form (affiliation_id, dptid, organization, city, postalcode, addresspart, country, auid, indexed_name, surname, given_name). If "given_name" is not present, fall back to initials. Note: Affiliation information might be missing or mal-assigned even when it lookes correct in the web view. In this case please request a correction. """ out = [] fields = 'affiliation_id dptid organization city postalcode '\ 'addresspart country auid indexed_name surname given_name' auth = namedtuple('Author', fields) items = listify(self._head.get('author-group', [])) index_path = ['preferred-name', 'ce:indexed-name'] for item in items: if not item: continue # Affiliation information aff = item.get('affiliation', {}) try: aff_ids = listify(aff['affiliation-id']) aff_id = ", ".join([a["@afid"] for a in aff_ids]) except KeyError: aff_id = aff.get("@afid") org = _get_org(aff) # Author information (might relate to collaborations) authors = listify(item.get('author', item.get('collaboration', []))) for au in authors: try: given = au.get('ce:given-name', au['ce:initials']) except KeyError: # Collaboration given = au.get('ce:text') new = auth(affiliation_id=aff_id, organization=org, city=aff.get('city'), dptid=aff.get("@dptid"), postalcode=aff.get('postal-code'), addresspart=aff.get('address-part'), country=aff.get('country'), auid=au.get('@auid'), surname=au.get('ce:surname'), given_name=given, indexed_name=chained_get(au, index_path)) out.append(new) return out or None
def funding(self): """List of namedtuples parsed funding information in the form (agency string id acronym country). """ path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding'] funds = listify(chained_get(self._json, path, [])) out = [] fund = namedtuple('Funding', 'agency string id acronym country') for item in funds: new = fund(agency=item.get('xocs:funding-agency'), string=item.get('xocs:funding-agency-matched-string'), id=item.get('xocs:funding-agency-id'), acronym=item.get('xocs:funding-agency-acronym'), country=item.get('xocs:funding-agency-country')) out.append(new) return out or None
def contributor_group(self): """List of namedtuples representing contributors compiled by Scopus, in the form (given_name, initials, surname, indexed_name, role). """ path = ['source', 'contributor-group'] items = listify(chained_get(self._head, path, [])) out = [] fields = 'given_name initials surname indexed_name role' pers = namedtuple('Contributor', fields) for item in items: entry = item.get('contributor', {}) new = pers(indexed_name=entry.get('ce:indexed-name'), role=entry.get('@role'), surname=entry.get('ce:surname'), given_name=entry.get('ce:given-name'), initials=entry.get('ce:initials')) out.append(new) return out or None
def results(self) -> Optional[List[NamedTuple]]: """A list of namedtuples representing results of subject classifications search in the form (code, description, detail, abbrev). """ out = [] path = ['subject-classifications', 'subject-classification'] search_results = chained_get(self._json, path, []) subj = namedtuple('Subject', self.fields) if isinstance(search_results, dict): for field_name in self.fields: if field_name not in search_results: search_results[field_name] = None out.append(subj(**search_results)) else: for result in search_results: missing_fields = set(self.fields).difference(result.keys()) if missing_fields: for field_name in missing_fields: result[field_name] = None out.append(subj(**result)) return out or None
def chemicals(self): """List of namedtuples representing chemical entities in the form (source, chemical_name, cas_registry_number). In case multiple numbers given, they are joined on ";". """ path = ['enhancement', 'chemicalgroup', 'chemicals'] items = listify(chained_get(self._head, path, [])) fields = 'source chemical_name cas_registry_number' chemical = namedtuple('Chemical', fields) out = [] for item in items: for chem in listify(item['chemical']): number = chem.get('cas-registry-number') try: # Multiple numbers given num = ";".join([n['$'] for n in number]) except TypeError: num = number new = chemical(source=item['@source'], cas_registry_number=num, chemical_name=chem['chemical-name']) out.append(new) return out or None
def __init__(self, identifier=None, refresh=False, view='META_ABS', id_type=None, **kwds): """Interaction with the Abstract Retrieval API. Parameters ---------- identifier : str or int The identifier of a document. Can be the Scopus EID, the Scopus ID, the PII, the Pubmed-ID or the DOI. refresh : bool or int (optional, default=False) Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. id_type: str (optional, default=None) The type of used ID. Allowed values: None, 'eid', 'pii', 'scopus_id', 'pubmed_id', 'doi'. If the value is None, the function tries to infer the ID type itself. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Allowed values: META, META_ABS, REF, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view. For details see https://dev.elsevier.com/guides/AbstractRetrievalViews.htm. kwds : key-value parings, optional Keywords passed on as query parameters. Must contain fields and values listed mentioned in the API specification (https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl), such as "startref" or "refcount". Raises ------ ValueError If the id_type parameter or the view parameter contains invalid entries. Examples -------- See https://pybliometrics.readthedocs.io/en/stable/examples/AbstractRetrieval.html. Notes ----- The directory for cached results is `{path}/{view}/{identifier}`, where `path` is specified in `~/.scopus/config.ini`. In case `identifier` is a DOI,, an underscore replaces the forward slash. """ # Checks identifier = str(identifier) allowed_views = ('META', 'META_ABS', 'REF', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) if id_type is None: id_type = detect_id_type(identifier) else: allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi') if id_type not in allowed_id_types: raise ValueError('id_type parameter must be one of ' + ', '.join(allowed_id_types)) # Load json Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='AbstractRetrieval', refresh=refresh, view=view, **kwds) self._json = self._json['abstracts-retrieval-response'] self._head = chained_get(self._json, ["item", "bibrecord", "head"], {}) conf_path = ['source', 'additional-srcinfo', 'conferenceinfo', 'confevent'] self._confevent = chained_get(self._head, conf_path, {}) if self._view == "REF": ref_path = ["references"] else: ref_path = ['item', 'bibrecord', 'tail', 'bibliography'] self._ref = chained_get(self._json, ref_path, {})
def website(self): """Website of publisher.""" path = ['source', 'website', 'ce:e-address', '$'] return chained_get(self._head, path)
def volume(self): """Volume for the document.""" return chained_get(self._json, ['coredata', 'prism:volume'])
def url(self): """URL to the API view of the document.""" return chained_get(self._json, ['coredata', 'prism:url'])
def title(self): """Title of the document.""" return chained_get(self._json, ['coredata', 'dc:title'])
def subtypedescription(self): """Type of the document. Refer to the Scopus Content Coverage Guide for a list of possible values. Long version of subtype. """ return chained_get(self._json, ['coredata', 'subtypeDescription']) or None
def srctype(self): """Aggregation type of source the document is published in (short version of aggregationType). """ return chained_get(self._json, ['coredata', 'srctype'])
def source_id(self): """Scopus source ID of the document.""" return chained_get(self._json, ['coredata', 'source-id'])
def pubmed_id(self): """The PubMed ID of the document.""" return chained_get(self._json, ['coredata', 'pubmed-id'])
def publisheraddress(self): """Name of the publisher of the document.""" return chained_get(self._head, ['source', 'publisher', 'publisheraddress'])