def __init__(self, author): """author should be an xml element. The following attributes are supported: author indexed_name given_name surname initials author_url - the scopus api url to get more information auid - the scopus id for the author scopusid - the scopus id for the author seq - the index of the author in the author list. affiliations - a list of ScopusAuthorAffiliation objects This class is not the same as the one in scopus.scopus_author, which uses the scopus author api. """ self.author = author self.indexed_name = get_encoded_text(author, 'ce:indexed-name') self.given_name = get_encoded_text(author, 'ce:given-name') self.surname = get_encoded_text(author, 'ce:surname') self.initials = get_encoded_text(author, 'ce:initials') self.author_url = get_encoded_text(author, 'dtd:author-url') self.auid = author.attrib.get('auid', None) self.scopusid = self.auid self.seq = author.attrib.get('seq', None) self.affiliations = [ _ScopusAuthorAffiliation(aff) for aff in author.findall('dtd:affiliation', ns) ]
def __init__(self, author): """author should be an xml element. The following attributes are supported: author indexed_name given_name surname initials author_url - the scopus api url to get more information auid - the scopus id for the author scopusid - the scopus id for the author seq - the index of the author in the author list. affiliations - a list of ScopusAuthorAffiliation objects This class is not the same as the one in scopus.scopus_author, which uses the scopus author api. """ self.author = author self.indexed_name = get_encoded_text(author, 'ce:indexed-name') self.given_name = get_encoded_text(author, 'ce:given-name') self.surname = get_encoded_text(author, 'ce:surname') self.initials = get_encoded_text(author, 'ce:initials') self.author_url = get_encoded_text(author, 'author-url') self.auid = author.attrib.get('auid') self.scopusid = self.auid self.seq = author.attrib.get('seq') self.affiliations = [_ScopusAuthorAffiliation(aff) for aff in author.findall('affiliation', ns)]
def name(self): """Author name.""" return ((get_encoded_text(self.xml, 'author-profile/preferred-name/given-name') or '') + ' ' + (get_encoded_text(self.xml, 'author-profile/preferred-name/surname') or ''))
def __init__(self, affiliation): """affiliation should be an xml element from the main abstract""" self.affiliation = affiliation self.affilname = get_encoded_text(affiliation, 'affilname') self.city = get_encoded_text(affiliation, 'affiliation-city') self.country = get_encoded_text(affiliation, 'affiliation-country') self.href = affiliation.attrib.get('href', None) self.id = affiliation.attrib.get('id', None)
def __init__(self, aff_id, refresh=False): """Class to represent an Affiliation in Scopus. Parameters ---------- aff_id : str or int The Scopus Affiliation ID. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/affiliation/{aff_id}. """ self._affiliation_id = aff_id qfile = os.path.join(SCOPUS_AFFILIATION_DIR, str(aff_id)) url = ('http://api.elsevier.com/content/affiliation/' 'affiliation_id/{}'.format(aff_id)) xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh)) # public url self._url = xml.find('coredata/link[@rel="scopus-affiliation"]') if self._url is not None: self._url = self.url.get('href') self.api_url = get_encoded_text(xml, 'coredata/prism:url') self._nauthors = get_encoded_text(xml, 'coredata/author-count') self._ndocuments = get_encoded_text(xml, 'coredata/document-count') self._name = get_encoded_text(xml, 'affiliation-name') self._address = get_encoded_text(xml, 'address') self._city = get_encoded_text(xml, 'city') self._country = get_encoded_text(xml, 'country')
def get_coauthors(self): """Return list of coauthors, their scopus-id and research areas.""" url = self.xml.find('coredata/link[@rel="coauthor-search"]').get( 'href') xml = download(url=url).text.encode('utf-8') xml = ET.fromstring(xml) coauthors = [] N = int(get_encoded_text(xml, 'opensearch:totalResults') or 0) AUTHOR = namedtuple('Author', ['name', 'scopus_id', 'affiliation', 'categories']) count = 0 while count < N: params = {'start': count, 'count': 25} xml = download(url=url, params=params).text.encode('utf-8') xml = ET.fromstring(xml) for entry in xml.findall('atom:entry', ns): given_name = get_encoded_text( entry, 'atom:preferred-name/atom:given-name') surname = get_encoded_text(entry, 'atom:preferred-name/atom:surname') coauthor_name = '{0} {1}'.format(given_name, surname) scopus_id = get_encoded_text(entry, 'dc:identifier').replace( 'AUTHOR_ID:', '') affiliation = get_encoded_text( entry, 'atom:affiliation-current/atom:affiliation-name') # get categories for this author s = ', '.join([ '{0} ({1})'.format(subject.text, subject.attrib['frequency']) for subject in entry.findall('atom:subject-area', ns) ]) coauthors += [AUTHOR(coauthor_name, scopus_id, affiliation, s)] count += 25 return coauthors
def get_coauthors(self): """Return list of coauthors, their scopus-id and research areas.""" url = self.xml.find('coredata/link[@rel="coauthor-search"]').get('href') xml = download(url=url).text.encode('utf-8') xml = ET.fromstring(xml) coauthors = [] N = int(get_encoded_text(xml, 'opensearch:totalResults') or 0) AUTHOR = namedtuple('Author', ['name', 'scopus_id', 'affiliation', 'categories']) count = 0 while count < N: params = {'start': count, 'count': 25} xml = download(url=url, params=params).text.encode('utf-8') xml = ET.fromstring(xml) for entry in xml.findall('atom:entry', ns): given_name = get_encoded_text(entry, 'atom:preferred-name/atom:given-name') surname = get_encoded_text(entry, 'atom:preferred-name/atom:surname') coauthor_name = u'{0} {1}'.format(given_name, surname) scopus_id = get_encoded_text(entry, 'dc:identifier').replace('AUTHOR_ID:', '') affiliation = get_encoded_text(entry, 'atom:affiliation-current/atom:affiliation-name') # get categories for this author s = u', '.join(['{0} ({1})'.format(subject.text, subject.attrib['frequency']) for subject in entry.findall('atom:subject-area', ns)]) coauthors += [AUTHOR(coauthor_name, scopus_id, affiliation, s)] count += 25 return coauthors
def orcid(self): """The author's ORCID.""" return get_encoded_text(self.xml, 'coredata/orcid')
def org_url(self): """Website of the affiliation.""" return get_encoded_text(self.xml, 'institution-profile/org-URL')
def org_type(self): """Type of the affiliation (only present if profile is org profile).""" return get_encoded_text(self.xml, 'institution-profile/org-type')
def ndocuments(self): """Number of documents for the affiliation.""" return get_encoded_text(self.xml, 'coredata/document-count')
def coverDate(self): """The date of the cover the abstract is in.""" return get_encoded_text(self.coredata, 'prism:coverDate')
def city(self): """The city of the affiliation.""" return get_encoded_text(self.xml, 'city')
def org_domain(self): """Internet domain of the affiliation.""" return get_encoded_text(self.xml, 'institution-profile/org-domain')
def firstname(self): """Author first name.""" return (get_encoded_text(self.xml, 'author-profile/preferred-name/given-name') or '')
def current_affiliation(self): """Current affiliation according to scopus.""" return get_encoded_text(self.xml, 'author-profile/affiliation-current/' 'affiliation/ip-doc/afdispname')
def ncoauthors(self): """Total number of coauthors.""" ncoauthors = get_encoded_text(self.xml, 'coauthor-count') return int(ncoauthors) if ncoauthors is not None else 0
def citation_count(self): """Total number of citing items.""" citation_count = get_encoded_text(self.xml, 'coredata/citation-count') return int(citation_count) if citation_count is not None else 0
def ncited_by(self): """Total number of citing authors.""" ncited_by = get_encoded_text(self.xml, 'coredata/cited-by-count') return int(ncited_by) if ncited_by is not None else 0
def ndocuments(self): """Number of documents authored (excludes book chapters and notes).""" ndocuments = get_encoded_text(self.xml, 'coredata/document-count') return int(ndocuments) if ndocuments is not None else 0
def hindex(self): """The author hindex""" hindex = get_encoded_text(self.xml, 'h-index') return int(hindex) if hindex is not None else 0
def citedby_count(self): """Number of articles citing the abstract.""" return int(get_encoded_text(self.coredata, 'citedby-count'))
def api_url(self): """URL to the affiliation's API page.""" return get_encoded_text(self.xml, 'coredata/prism:url')
def lastname(self): """Author last name.""" return (get_encoded_text(self.xml, 'author-profile/preferred-name/surname') or '')
def name(self): """The name of the affiliation.""" return get_encoded_text(self.xml, 'affiliation-name')
def description(self): """Return the description of a record. Note: If this is empty, try the abstract instead. """ return get_encoded_text(self.coredata, 'dc:description')
def country(self): """The country of the affiliation.""" return get_encoded_text(self.xml, 'country')
def address(self): """The address of the affiliation.""" return get_encoded_text(self.xml, 'address')
def nauthors(self): """Number of authors in the affiliation.""" return get_encoded_text(self.xml, 'coredata/author-count')
def state(self): """The state (country's administrative sububunit) of the affiliation.""" return get_encoded_text(self.xml, 'state')
def __init__(self, author_id, refresh=False, refresh_aff=False, level=1): """Class to represent a Scopus Author query by the scopus-id. Parameters ---------- author_id : str or int The ID of the author to search for. refresh : bool (optional, default=False) Whether to refresh the cached file (if it exists) or not. refresh_aff : bool (optional, default=False) Whether to refresh the cached corresponding affiliation views (if they exist) or not. level : int (optional, default=1) Number of * to print in property __str__. Notes ----- The files are cached in ~/.scopus/author/{author_id}. """ author_id = str(int(author_id)) self.level = level qfile = os.path.join(SCOPUS_AUTHOR_DIR, author_id) url = ('http://api.elsevier.com/content/author/' 'author_id/{}').format(author_id) params = {'author_id': author_id, 'view': 'ENHANCED'} xml = ET.fromstring( get_content(qfile, url=url, refresh=refresh, params=params)) self.xml = xml self._orcid = get_encoded_text(xml, 'coredata/orcid') hindex = get_encoded_text(xml, 'h-index') self._hindex = int(hindex) if hindex is not None else 0 ndocuments = get_encoded_text(xml, 'coredata/document-count') self._ndocuments = int(ndocuments) if ndocuments is not None else 0 _author_id = get_encoded_text(xml, 'coredata/dc:identifier') self._author_id = _author_id.split(":")[-1] citation_count = get_encoded_text(xml, 'coredata/citation-count') self._citation_count = int( citation_count) if citation_count is not None else 0 ncited_by = get_encoded_text(xml, 'coredata/cited-by-count') self._ncited_by = int(ncited_by) if ncited_by is not None else 0 ncoauthors = get_encoded_text(xml, 'coauthor-count') self._ncoauthors = int(ncoauthors) if ncoauthors is not None else 0 self._current_affiliation = get_encoded_text( xml, 'author-profile/affiliation-current/affiliation/ip-doc/afdispname') # affiliation history (sort out faulty historic affiliations) aff_ids = [ el.attrib.get('affiliation-id') for el in xml.findall( 'author-profile/affiliation-history/affiliation') if el is not None and len(list(el.find("ip-doc").iter())) > 1 ] affs = [ ScopusAffiliation(aff_id, refresh=refresh_aff) for aff_id in aff_ids ] self._affiliation_history = affs date_created = xml.find('author-profile/date-created', ns) if date_created is not None: self._date_created = (int(date_created.attrib['year']), int(date_created.attrib['month']), int(date_created.attrib['day'])) else: self._date_created = (None, None, None) # Research areas self._area_elements = xml.findall('subject-areas/subject-area') # {code: name} d = {int(ae.attrib['code']): ae.text for ae in self._area_elements} freqs = xml.findall('author-profile/classificationgroup/' 'classifications[@type="ASJC"]/classification') # {code: frequency} c = {int(cls.text): int(cls.attrib['frequency']) for cls in freqs} self._subject_freq = c categories = [(d[code], c[code]) for code in d] categories.sort(reverse=True, key=itemgetter(1)) self.categories = categories self._firstname = (get_encoded_text( xml, 'author-profile/preferred-name/given-name') or '') self._lastname = (get_encoded_text( xml, 'author-profile/preferred-name/surname') or '') self._name = ( (get_encoded_text(xml, 'author-profile/preferred-name/given-name') or '') + ' ' + (get_encoded_text(xml, 'author-profile/preferred-name/surname') or '')) # Real website for the author self._scopus_url = xml.find('coredata/link[@rel="scopus-author"]') if self._scopus_url is not None: self._scopus_url = self._scopus_url.get('href') # API URL for coauthors self._coauthor_url = xml.find('coredata/link[@rel="coauthor-search"]') if self._coauthor_url is not None: self._coauthor_url = self._coauthor_url.get('href') # Publication history pub_hist_elements = self.xml.findall('author-profile/journal-history/') self._pub_hist = pub_hist_elements
def __init__(self, EID, view='META_ABS', refresh=False): """Class to represent the results from a Scopus abstract. Parameters ---------- EID : str The Scopus ID of an abstract. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Currently supported values: META, META_ABS, FULL. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/xml/{eid}. """ allowed_views = ('META', 'META_ABS', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) # Get file content qfile = os.path.join(SCOPUS_XML_DIR, EID) url = "http://api.elsevier.com/content/abstract/eid/{}".format(EID) params = {'view': view} xml = ET.fromstring( get_content(qfile, url=url, refresh=refresh, params=params)) self.xml = xml if xml.tag == 'service-error': raise Exception('\n{0}\n{1}'.format(EID, self.xml)) # Parse coredata coredata = xml.find('dtd:coredata', ns) self._url = get_encoded_text(coredata, 'prism:url') self.identifier = get_encoded_text(coredata, 'dc:identifier') self.eid = get_encoded_text(coredata, 'dtd:eid') self._doi = get_encoded_text(coredata, 'prism:doi') self._title = get_encoded_text(coredata, 'dc:title') self._aggregationType = get_encoded_text(coredata, 'prism:aggregationType') self._publicationName = get_encoded_text(coredata, 'prism:publicationName') self._srctype = get_encoded_text(coredata, 'dtd:srctype') self._citedby_count = get_encoded_text(coredata, 'dtd:citedby-count') self._publisher = get_encoded_text(coredata, 'dc:publisher') self._source_id = get_encoded_text(coredata, 'dtd:source-id') self._issn = get_encoded_text(coredata, 'prism:issn') self._volume = get_encoded_text(coredata, 'prism:volume') self._issueIdentifier = get_encoded_text(coredata, 'prism:issueIdentifier') self._article_number = get_encoded_text(coredata, 'dtd:article-number') self._startingPage = get_encoded_text(coredata, 'prism:startingPage') self._endingPage = get_encoded_text(coredata, 'prism:endingPage') self._pageRange = get_encoded_text(coredata, 'prism:pageRange') self._coverDate = get_encoded_text(coredata, 'prism:coverDate') self.creator = get_encoded_text(coredata, 'dc:creator') self.description = get_encoded_text(coredata, 'dc:description') sl = coredata.find('dtd:link[@rel="scopus"]', ns).get('href') self_link = coredata.find('dtd:link[@rel="self"]', ns).get('href') cite_link = coredata.find('dtd:link[@rel="cited-by"]', ns) if cite_link: cite_link = cite_link.get('href') self.scopus_link = sl self.self_link = self_link self.cite_link = cite_link # Parse subject-areas subjectAreas = xml.find('dtd:subject-areas', ns) try: self._subjectAreas = [a.text for a in subjectAreas] except: self._subjectAreas = None # Parse authors authors = xml.find('dtd:authors', ns) self._authors = [_ScopusAuthor(author) for author in authors] self._affiliations = [ _ScopusAffiliation(aff) for aff in xml.findall('dtd:affiliation', ns) ] # Parse items items = xml.find('item', ns) self._website = get_encoded_text( items, 'bibrecord/head/source/website/ce:e-address') try: self._citationType = items.find( 'bibrecord/head/citation-info/citation-type').get("code") except: self._citationType = None try: self._citationLang = items.find( 'bibrecord/head/citation-info/citation-language').get( "language") except: self._citationLang = None try: self._references = tail.find('bibrecord/tail/bibliography', ns) except: self._references = None
def author_id(self): """The scopus id for the author.""" author_id = get_encoded_text(self.xml, 'coredata/dc:identifier') return author_id.split(":")[-1]
def __init__(self, ISSN, refresh=False): ISSN = str(ISSN) self.issn = ISSN qfile = os.path.join(SCOPUS_ISSN_DIR, ISSN) url = ("http://api.elsevier.com/content/serial/title/issn:" + ISSN) self.xml = get_content(qfile, refresh, url) self.publisher = get_encoded_text(self.xml, 'entry/dc:publisher') self.title = get_encoded_text(self.xml, 'entry/dc:title') self.aggregationType = get_encoded_text(self.xml, 'entry/prism:aggregationType') self.prism_url = get_encoded_text(self.xml, 'entry/prism:url') # Impact factors SNIP = get_encoded_text(self.xml, 'entry/SNIPList/SNIP') SNIP_year = self.xml.find('entry/SNIPList/SNIP', ns) if SNIP_year is not None: SNIP_year = SNIP_year.get('year') else: SNIP_year = -1 IPP = get_encoded_text(self.xml, 'entry/IPPList/IPP') IPP_year = self.xml.find('entry/IPPList/IPP', ns) if IPP_year is not None: IPP_year = IPP_year.get('year') else: IPP_year = -1 SJR = get_encoded_text(self.xml, 'entry/SJRList/SJR') SJR_year = self.xml.find('entry/SJRList/SJR', ns) if SJR_year is not None: SJR_year = SJR_year.get('year') else: SJR_year = -1 if SNIP: self.SNIP = float(SNIP) self.SNIP_year = int(SNIP_year) else: self.SNIP = None self.SNIP_year = None if IPP: self.IPP = float(IPP) self.IPP_year = int(IPP_year) else: self.IPP = None self.IPP_year = None if SJR: self.SJR = float(SJR) self.SJR_year = int(SJR_year) else: self.SJR = None self.SJR_year = None scopus_url = self.xml.find('entry/link[@ref="scopus-source"]') if scopus_url is not None: self.scopus_url = scopus_url.attrib['href'] else: self.scopus_url = None homepage = self.xml.find('entry/link[@ref="homepage"]') if homepage is not None: self.homepage = homepage.attrib['href'] else: self.homepage = None
def __str__(self): """Return a summary string.""" s = [ '{} {} (updated on {})'.format('*' * self.level, self._name, time.asctime()) ] url = self.xml.find('coredata/link[@rel="scopus-author"]') if url is not None: url = url.get('href', 'None') else: url = '' s += [''] orcid = get_encoded_text(self.xml, 'coredata/orcid') if orcid is not None: s += ['http://orcid.org/' + orcid] s += [ '{} documents cited {} times by {} people ({} coauthors)'.format( self._ndocuments, self._citation_count, self._ncited_by, self._ncoauthors) ] s += ['#first author papers {0}'.format(self.n_first_author_papers())] s += ['#last author papers {0}'.format(self.n_last_author_papers())] s += [ 'h-index: {}'.format(self._hindex) + ' AIF(2014) = ' + '{0:1.2f}'.format(self.author_impact_factor(2015)[2]) ] s += ['Scopus ID created on {}'.format(self.date_created)] # Current Affiliation. Note this is what Scopus thinks is current. s += ['\nCurrent affiliation according to Scopus:'] s += [' ' + (self._current_affiliation or '')] # subject areas s += ['\nSubject areas'] s += [ textwrap.fill(', '.join( ['{0} ({1})'.format(el[0], el[1]) for el in self.categories]), initial_indent=' ', subsequent_indent=' ') ] # journals published in temp_s = [el.find('sourcetitle-abbrev').text for el in self._pub_hist] s += [ '\nPublishes in:\n' + textwrap.fill( ', '.join(temp_s), initial_indent=' ', subsequent_indent=' ') ] # affiliation history s += ['\nAffiliation history:'] for aff in self.affiliation_history: s += [str(aff)] # print a bibliography s += [self.get_document_summary()] return '\n'.join(s)
def __init__(self, ISSN, refresh=False): ISSN = str(ISSN) self.issn = ISSN qfile = os.path.join(SCOPUS_ISSN_DIR, ISSN) url = ("https://api.elsevier.com/content/serial/title/issn:" + ISSN) self.xml = ET.fromstring(get_content(qfile, refresh, url)) self.publisher = get_encoded_text(self.xml, 'entry/dc:publisher') self.title = get_encoded_text(self.xml, 'entry/dc:title') self.aggregationType = get_encoded_text(self.xml, 'entry/prism:aggregationType') self.prism_url = get_encoded_text(self.xml, 'entry/prism:url') # Impact factors SNIP = get_encoded_text(self.xml, 'entry/SNIPList/SNIP') SNIP_year = self.xml.find('entry/SNIPList/SNIP', ns) if SNIP_year is not None: SNIP_year = SNIP_year.get('year') else: SNIP_year = -1 IPP = get_encoded_text(self.xml, 'entry/IPPList/IPP') IPP_year = self.xml.find('entry/IPPList/IPP', ns) if IPP_year is not None: IPP_year = IPP_year.get('year') else: IPP_year = -1 SJR = get_encoded_text(self.xml, 'entry/SJRList/SJR') SJR_year = self.xml.find('entry/SJRList/SJR', ns) if SJR_year is not None: SJR_year = SJR_year.get('year') else: SJR_year = -1 if SNIP: self.SNIP = float(SNIP) self.SNIP_year = int(SNIP_year) else: self.SNIP = None self.SNIP_year = None if IPP: self.IPP = float(IPP) self.IPP_year = int(IPP_year) else: self.IPP = None self.IPP_year = None if SJR: self.SJR = float(SJR) self.SJR_year = int(SJR_year) else: self.SJR = None self.SJR_year = None scopus_url = self.xml.find('entry/link[@ref="scopus-source"]') if scopus_url is not None: self.scopus_url = scopus_url.attrib['href'] else: self.scopus_url = None homepage = self.xml.find('entry/link[@ref="homepage"]') if homepage is not None: self.homepage = homepage.attrib['href'] else: self.homepage = None
def affiliation_id(self): """The Scopus ID of the affiliation.""" return get_encoded_text(self.xml, 'coredata/dc:identifier').split(":")[-1]
def current_affiliation(self): """Current affiliation according to scopus.""" return get_encoded_text( self.xml, 'author-profile/affiliation-current/' 'affiliation/ip-doc/afdispname')
def firstname(self): """Author first name.""" return (get_encoded_text( self.xml, 'author-profile/preferred-name/given-name') or '')
def __init__(self, aff_id, refresh=False): """Class to represent an Affiliation in Scopus. Parameters ---------- aff_id : str or int The Scopus Affiliation ID. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/affiliation/{aff_id}. """ qfile = os.path.join(SCOPUS_AFFILIATION_DIR, str(aff_id)) url = ('https://api.elsevier.com/content/affiliation/' 'affiliation_id/{}'.format(aff_id)) xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh)) # coredata self._url = xml.find('coredata/link[@rel="scopus-affiliation"]') _aff_id = get_encoded_text(xml, 'coredata/dc:identifier') self._aff_id = _aff_id.split(":")[-1] if self._url is not None: self._url = self.url.get('href') self._api_url = get_encoded_text(xml, 'coredata/prism:url') self._nauthors = get_encoded_text(xml, 'coredata/author-count') self._ndocuments = get_encoded_text(xml, 'coredata/document-count') self._name = get_encoded_text(xml, 'affiliation-name') self._address = get_encoded_text(xml, 'address') self._city = get_encoded_text(xml, 'city') self._country = get_encoded_text(xml, 'country') # institution-profile date_created = xml.find('institution-profile/date-created') if date_created is not None: self._date_created = (int(date_created.attrib['year']), int(date_created.attrib['month']), int(date_created.attrib['day'])) else: self._date_created = (None, None, None) self._org_type = get_encoded_text(xml, 'institution-profile/org-type') self._org_domain = get_encoded_text(xml, 'institution-profile/org-domain') self._org_url = get_encoded_text(xml, 'institution-profile/org-URL')