Exemplo n.º 1
0
    def __init__(self, author_id, refresh=False, refresh_aff=False, level=1):
        """Class to represent a Scopus Author query by the scopus-id.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file (if it exists) or not.

        refresh_aff : bool (optional, default=False)
            Whether to refresh the cached corresponding affiliation views
            (if they exist) or not.

        level : int (optional, default=1)
            Number of * to print in property __str__.

        Notes
        -----
        The files are cached in ~/.scopus/author/{author_id} (without
        eventually leading '9-s2.0-').
        """
        author_id = str(int(str(author_id).split('-')[-1]))
        self.level = level

        qfile = os.path.join(SCOPUS_AUTHOR_DIR, author_id)
        url = ('https://api.elsevier.com/content/author/'
               'author_id/{}').format(author_id)
        params = {'author_id': author_id, 'view': 'ENHANCED'}
        self.xml =  ET.fromstring(get_content(qfile, url=url, refresh=refresh,
                                              params=params))
    def __init__(self, aff_id, refresh=False):
        """Class to represent an Affiliation in Scopus.

        Parameters
        ----------
        aff_id : str or int
            The Scopus Affiliation ID.  Optionally expressed
            as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/affiliation/{aff_id}.
        """
        if config.getboolean('Warnings', 'Affiliation'):
            text = config.get('Warnings',
                              'Text').format('ContentAffiliationRetrieval')
            warnings.warn(text, DeprecationWarning)
            config.set('Warnings', 'Affiliation', '0')
        aff_id = str(int(str(aff_id).split('-')[-1]))

        qfile = os.path.join(SCOPUS_AFFILIATION_DIR, aff_id)
        url = ('https://api.elsevier.com/content/affiliation/'
               'affiliation_id/{}'.format(aff_id))

        self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh))
Exemplo n.º 3
0
    def __init__(self, aff_id, refresh=False):
        """Class to represent an Affiliation in Scopus.

        Parameters
        ----------
        aff_id : str or int
            The Scopus Affiliation ID.  Optionally expressed
            as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/affiliation_retrieval/{aff_id}.
        """
        aff_id = str(int(str(aff_id).split('-')[-1]))

        qfile = join(config.get('Directories', 'ContentAffiliationRetrieval'),
                     aff_id)
        url = ('https://api.elsevier.com/content/affiliation/'
               'affiliation_id/{}'.format(aff_id))

        res = get_content(qfile, url=url, refresh=refresh, accept='json')
        self._json = loads(
            res.decode('utf-8'))['affiliation-retrieval-response']
Exemplo n.º 4
0
    def __init__(self, aff_id, refresh=False):
        """Class to represent an Affiliation in Scopus.

        Parameters
        ----------
        aff_id : str or int
            The Scopus Affiliation ID.  Optionally expressed
            as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/affiliation/{aff_id}.
        """
        if config.getboolean('Warnings', 'Affiliation'):
            text = config.get('Warnings', 'Text').format('ContentAffiliationRetrieval')
            warnings.warn(text, DeprecationWarning)
            config.set('Warnings', 'Affiliation', '0')
        aff_id = str(int(str(aff_id).split('-')[-1]))

        qfile = os.path.join(SCOPUS_AFFILIATION_DIR, aff_id)
        url = ('https://api.elsevier.com/content/affiliation/'
               'affiliation_id/{}'.format(aff_id))

        self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh))
Exemplo n.º 5
0
    def __init__(self, aff_id, refresh=False):
        """Class to represent an Affiliation in Scopus.

        Parameters
        ----------
        aff_id : str or int
            The Scopus Affiliation ID.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/affiliation/{aff_id}.
        """

        self._affiliation_id = aff_id

        qfile = os.path.join(SCOPUS_AFFILIATION_DIR, str(aff_id))
        url = ('http://api.elsevier.com/content/affiliation/'
               'affiliation_id/{}'.format(aff_id))

        xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh))

        # public url
        self._url = xml.find('coredata/link[@rel="scopus-affiliation"]')
        if self._url is not None:
            self._url = self.url.get('href')
        self.api_url = get_encoded_text(xml, 'coredata/prism:url')
        self._nauthors = get_encoded_text(xml, 'coredata/author-count')
        self._ndocuments = get_encoded_text(xml, 'coredata/document-count')
        self._name = get_encoded_text(xml, 'affiliation-name')
        self._address = get_encoded_text(xml, 'address')
        self._city = get_encoded_text(xml, 'city')
        self._country = get_encoded_text(xml, 'country')
Exemplo n.º 6
0
    def __init__(self, eid, start, end=datetime.now().year, refresh=False):
        """Class to represent the results from a Scopus Citation Overview.
        See https://api.elsevier.com/documentation/guides/AbstractCitationViews.htm.

        Parameters
        ----------
        eid : str
            The EID of the abstract.

        start : str or int
            The first year for which the citation count should be loaded

        end : str or int (optional, default=datetime.now().year)
            The last year for which the citation count should be loaded.
            Default is the current year.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/citation_overview/{eid}.
        Your API Key needs to be approved by Elsevier to access this view.
        """
        # Get file content
        scopus_id = eid.split('0-')[-1]
        qfile = os.path.join(CITATION_OVERVIEW_DIR, eid)
        url = "https://api.elsevier.com/content/abstract/citations/{}".format(
            scopus_id)
        params = {'scopus_id': scopus_id, 'date': '{}-{}'.format(start, end)}
        res = get_content(qfile,
                          url=url,
                          refresh=refresh,
                          params=params,
                          accept='json')
        data = loads(res.decode('utf-8'))['abstract-citations-response']

        self.start = int(start)
        self.end = int(end)

        # citeInfoMatrix
        m = data['citeInfoMatrix']['citeInfoMatrixXML']['citationMatrix'][
            'citeInfo'][0]
        self.citeInfoMatrix = {k.split(":", 1)[-1]: v for k, v in m.items()}
        # h-index
        self.hindex = data['h-index']
        # identifier-legend
        l = data['identifier-legend']['identifier'][0]
        self.identifierlegend = {k.split(":", 1)[-1]: v for k, v in l.items()}
        # citeColumnTotalXML
        self.citeColumnTotalXML = data['citeColumnTotalXML']  # not used
Exemplo n.º 7
0
    def __init__(self, EID, view='META_ABS', refresh=False):
        """Class to represent the results from a Scopus abstract.

        Parameters
        ----------
        EID : str
            The Scopus ID (EID) of an abstract.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files. Supported values: META, META_ABS,
            FULL, where FULL includes all information of META_ABS view and
            META_ABS includes all information of the META view .  See
            https://dev.elsevier.com/guides/AbstractRetrievalViews.htm
            for details.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/xml/{eid}.
        """
        if config.getboolean('Warnings', 'Abstract'):
            text = config.get('Warnings', 'Text').format('AbstractRetrieval')
            warnings.warn(text, DeprecationWarning)
            config.set('Warnings', 'Abstract', '0')
        allowed_views = ('META', 'META_ABS', 'FULL')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))

        # Get file content
        qfile = os.path.join(SCOPUS_XML_DIR, EID)
        url = "https://api.elsevier.com/content/abstract/eid/{}".format(EID)
        params = {'view': view}
        self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh,
                                             params=params))
        # Remove default namespace if present
        remove = u'{http://www.elsevier.com/xml/svapi/abstract/dtd}'
        nsl = len(remove)
        for elem in self.xml.getiterator():
            if elem.tag.startswith(remove):
                elem.tag = elem.tag[nsl:]

        if self.xml.tag == 'service-error':
            raise Exception('\n{0}\n{1}'.format(EID, self.xml))

        self.coredata = self.xml.find('coredata', ns)
        self.items = self.xml.find('item', ns)
Exemplo n.º 8
0
    def __init__(self, EID, view='META_ABS', refresh=False):
        """Class to represent the results from a Scopus abstract.

        Parameters
        ----------
        EID : str
            The Scopus ID (EID) of an abstract.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files. Supported values: META, META_ABS,
            FULL, where FULL includes all information of META_ABS view and
            META_ABS includes all information of the META view .  See
            https://dev.elsevier.com/guides/AbstractRetrievalViews.htm
            for details.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/xml/{eid}.
        """
        if config.getboolean('Warnings', 'Abstract'):
            text = config.get('Warnings', 'Text').format('AbstractRetrieval')
            warnings.warn(text, DeprecationWarning)
            config.set('Warnings', 'Abstract', '0')
        allowed_views = ('META', 'META_ABS', 'FULL')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))

        # Get file content
        qfile = os.path.join(SCOPUS_XML_DIR, EID)
        url = "https://api.elsevier.com/content/abstract/eid/{}".format(EID)
        params = {'view': view}
        self.xml = ET.fromstring(
            get_content(qfile, url=url, refresh=refresh, params=params))
        # Remove default namespace if present
        remove = u'{http://www.elsevier.com/xml/svapi/abstract/dtd}'
        nsl = len(remove)
        for elem in self.xml.getiterator():
            if elem.tag.startswith(remove):
                elem.tag = elem.tag[nsl:]

        if self.xml.tag == 'service-error':
            raise Exception('\n{0}\n{1}'.format(EID, self.xml))

        self.coredata = self.xml.find('coredata', ns)
        self.items = self.xml.find('item', ns)
Exemplo n.º 9
0
    def __init__(self, aff_id, refresh=False):
        """Class to represent an Affiliation in Scopus.

        Parameters
        ----------
        aff_id : str or int
            The Scopus Affiliation ID.  Optionally expressed
            as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/affiliation/{aff_id}.
        """
        aff_id = str(int(str(aff_id).split('-')[-1]))

        qfile = os.path.join(SCOPUS_AFFILIATION_DIR, aff_id)
        url = ('https://api.elsevier.com/content/affiliation/'
               'affiliation_id/{}'.format(aff_id))

        xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh))

        # coredata
        self._url = xml.find('coredata/link[@rel="scopus-affiliation"]')
        _aff_id = get_encoded_text(xml, 'coredata/dc:identifier')
        self._aff_id = _aff_id.split(":")[-1]
        if self._url is not None:
            self._url = self.url.get('href')
        self._api_url = get_encoded_text(xml, 'coredata/prism:url')
        self._nauthors = get_encoded_text(xml, 'coredata/author-count')
        self._ndocuments = get_encoded_text(xml, 'coredata/document-count')

        self._name = get_encoded_text(xml, 'affiliation-name')
        self._address = get_encoded_text(xml, 'address')
        self._city = get_encoded_text(xml, 'city')
        self._country = get_encoded_text(xml, 'country')

        # institution-profile
        date_created = xml.find('institution-profile/date-created')
        if date_created is not None:
            self._date_created = (int(date_created.attrib['year']),
                                  int(date_created.attrib['month']),
                                  int(date_created.attrib['day']))
        else:
            self._date_created = (None, None, None)
        self._org_type = get_encoded_text(xml, 'institution-profile/org-type')
        self._org_domain = get_encoded_text(xml, 'institution-profile/org-domain')
        self._org_url = get_encoded_text(xml, 'institution-profile/org-URL')
Exemplo n.º 10
0
    def __init__(self, author_id, refresh=False):
        """Class to represent a Scopus Author query by the scopus-id.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file (if it exists) or not.

        Notes
        -----
        The files are cached in ~/.scopus/author_retrieval/{author_id} (without
        eventually leading '9-s2.0-').
        """
        self._id = str(int(str(author_id).split('-')[-1]))

        qfile = join(config.get('Directories', 'AuthorRetrieval'), self._id)
        url = ('https://api.elsevier.com/content/author/'
               'author_id/{}').format(self._id)
        params = {'author_id': self._id, 'view': 'ENHANCED'}
        res = get_content(qfile,
                          url=url,
                          refresh=refresh,
                          accept='json',
                          params=params)
        self._json = loads(res.decode('utf-8'))['author-retrieval-response']
        try:
            self._json = self._json[0]
        except KeyError:
            alias_json = self._json['alias']['prism:url']
            if not isinstance(alias_json, list):
                alias_json = [alias_json]
            alias = ', '.join([d['$'].split(':')[-1] for d in alias_json])
            text = 'Author profile with ID {} has been merged and the main '\
                   'profile is now one of {}.  Please update your records '\
                   'manually.  Functionality of this object is '\
                   'reduced.'.format(author_id, alias)
            warn(text, UserWarning)
Exemplo n.º 11
0
    def __init__(self, author_id, refresh=False, refresh_aff=False, level=1):
        """Class to represent a Scopus Author query by the scopus-id.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.  Optionally expressed
            as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn).

        refresh : bool (optional, default=False)
            Whether to refresh the cached file (if it exists) or not.

        refresh_aff : bool (optional, default=False)
            Whether to refresh the cached corresponding affiliation views
            (if they exist) or not.

        level : int (optional, default=1)
            Number of * to print in property __str__.

        Notes
        -----
        The files are cached in ~/.scopus/author/{author_id} (without
        eventually leading '9-s2.0-').
        """
        if config.getboolean('Warnings', 'Author'):
            text = config.get('Warnings', 'Text').format('AuthorRetrieval')
            warnings.warn(text, DeprecationWarning)
            config.set('Warnings', 'Author', '0')
        author_id = str(int(str(author_id).split('-')[-1]))
        self.level = level

        qfile = os.path.join(SCOPUS_AUTHOR_DIR, author_id)
        url = ('https://api.elsevier.com/content/author/'
               'author_id/{}').format(author_id)
        params = {'author_id': author_id, 'view': 'ENHANCED'}
        self.xml =  ET.fromstring(get_content(qfile, url=url, refresh=refresh,
                                              params=params))
Exemplo n.º 12
0
    def __init__(self,
                 query,
                 filepath,
                 url,
                 refresh,
                 count=200,
                 start=0,
                 max_entries=5000):
        """Class intended for use a superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        filepath : str
            The complete filepath and -name of the cached file.

        url : str
            The API access point.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        start : int (optional, default=0)
            The entry number of the first search item to start with.


        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            The Scopus Search Engine does not allow more than 5000 entries.

        Raises
        ------
        Exception
            If the number of search results exceeds max_entries.
        """
        # Read the file contents if it exists and we are not refreshing.
        if not refresh and exists(filepath):
            self._json = []
            with open(filepath) as f:
                for r in f.readlines():
                    self._json.append(loads(r))
        # If cached file doesn't exists, or we are refreshing, download file.
        else:
            # First, we get a count of how many things to retrieve.
            params = {'query': query, 'count': 0, 'start': 0}
            res = get_content(filepath,
                              url=url,
                              refresh=refresh,
                              params=params,
                              accept='json')
            data = loads(res.decode('utf-8'))['search-results']
            N = int(data.get('opensearch:totalResults', 0))
            if N > max_entries:
                raise Exception(('Found {} matches. '
                                 'Set max_entries to a higher number or '
                                 'change your query ({})').format(N, query))

            # Then we download the information in chunks.
            self._json = []
            while N > 0:
                params = {'query': query, 'count': count, 'start': start}
                resp = download(url=url, params=params, accept="json")
                results = resp.json()

                if 'entry' in results.get('search-results', []):
                    for r in results['search-results']['entry']:
                        self._json.append({f: r[f] for f in r.keys()})
                start += count
                N -= count

            # Finally write out the file.
            with open(filepath, 'wb') as f:
                for author in self._json:
                    f.write('{}\n'.format(dumps(author)).encode('utf-8'))
Exemplo n.º 13
0
    def __init__(self, EID, view='META_ABS', refresh=False, id_type=None):
        """Class to represent the results from a Scopus abstract.

        Parameters
        ----------
        EID : str
            The Scopus ID (EID) of an abstract.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files. Allowed values: META, META_ABS,
            FULL, where FULL includes all information of META_ABS view and
            META_ABS includes all information of the META view .  See
            https://dev.elsevier.com/guides/AbstractRetrievalViews.htm
            for details.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        id_type: str (optional, default=None)
            The type of used ID. Allowed values: None, 'eid','pii',
            'scopus_id','pubmed_id','doi'. If the value is None, the function
            tries to infer the ID type itself. Other values manually set the
            ID type to one of the types supported by Scopus.

        ValueError
            If the view parameters contains invalid entries.

        Notes
        -----
        The files are cached in ~/.scopus/abstract_retrieval/{eid}.

        DOI always contains '/' symbol, which is a path separator in some operating
        systems so '/' has to be replaced in the filename for caching.
        """
        EID = str(EID)
        allowed_views = ('META', 'META_ABS', 'FULL')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))

        if id_type is None:
            id_type = detect_id_type(EID)
        else:
            allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi')
            if id_type not in allowed_id_types:
                raise ValueError('id_type parameter must be one of ' +
                                 ', '.join(allowed_id_types))

        qfile = join(config.get('Directories', 'AbstractRetrieval'),
                     EID.replace('/', '_'))
        url = "https://api.elsevier.com/content/abstract/{}/{}".format(
            id_type, EID)
        res = get_content(qfile,
                          url=url,
                          refresh=refresh,
                          accept='json',
                          params={'view': view})
        self._json = loads(res.decode('utf-8'))['abstracts-retrieval-response']
        self._head = self._json.get('item', {}).get('bibrecord',
                                                    {}).get('head', {})
        self._tail = self._json.get('item', {}).get('bibrecord',
                                                    {}).get('tail', {})
        if self._tail is None:
            self._tail = {}
        self._confinfo = self._head.get('source',
                                        {}).get('additional-srcinfo',
                                                {}).get('conferenceinfo', {})
Exemplo n.º 14
0
    def __init__(self, ISSN, refresh=False):
        ISSN = str(ISSN)
        self.issn = ISSN

        qfile = os.path.join(SCOPUS_ISSN_DIR, ISSN)
        url = ("http://api.elsevier.com/content/serial/title/issn:" + ISSN)
        self.xml = get_content(qfile, refresh, url)

        self.publisher = get_encoded_text(self.xml, 'entry/dc:publisher')
        self.title = get_encoded_text(self.xml, 'entry/dc:title')
        self.aggregationType = get_encoded_text(self.xml,
                                                'entry/prism:aggregationType')
        self.prism_url = get_encoded_text(self.xml, 'entry/prism:url')

        # Impact factors
        SNIP = get_encoded_text(self.xml, 'entry/SNIPList/SNIP')
        SNIP_year = self.xml.find('entry/SNIPList/SNIP', ns)
        if SNIP_year is not None:
            SNIP_year = SNIP_year.get('year')
        else:
            SNIP_year = -1

        IPP = get_encoded_text(self.xml, 'entry/IPPList/IPP')
        IPP_year = self.xml.find('entry/IPPList/IPP', ns)
        if IPP_year is not None:
            IPP_year = IPP_year.get('year')
        else:
            IPP_year = -1

        SJR = get_encoded_text(self.xml, 'entry/SJRList/SJR')
        SJR_year = self.xml.find('entry/SJRList/SJR', ns)
        if SJR_year is not None:
            SJR_year = SJR_year.get('year')
        else:
            SJR_year = -1
        if SNIP:
            self.SNIP = float(SNIP)
            self.SNIP_year = int(SNIP_year)
        else:
            self.SNIP = None
            self.SNIP_year = None

        if IPP:
            self.IPP = float(IPP)
            self.IPP_year = int(IPP_year)
        else:
            self.IPP = None
            self.IPP_year = None

        if SJR:
            self.SJR = float(SJR)
            self.SJR_year = int(SJR_year)
        else:
            self.SJR = None
            self.SJR_year = None

        scopus_url = self.xml.find('entry/link[@ref="scopus-source"]')
        if scopus_url is not None:
            self.scopus_url = scopus_url.attrib['href']
        else:
            self.scopus_url = None

        homepage = self.xml.find('entry/link[@ref="homepage"]')
        if homepage is not None:
            self.homepage = homepage.attrib['href']
        else:
            self.homepage = None
Exemplo n.º 15
0
    def __init__(self, EID, view='META_ABS', refresh=False):
        """Class to represent the results from a Scopus abstract.

        Parameters
        ----------
        EID : str
            The Scopus ID of an abstract.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Currently
            supported values: META, META_ABS, FULL.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/xml/{eid}.
        """
        allowed_views = ('META', 'META_ABS', 'FULL')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))

        # Get file content
        qfile = os.path.join(SCOPUS_XML_DIR, EID)
        url = "http://api.elsevier.com/content/abstract/eid/{}".format(EID)
        params = {'view': view}
        xml = ET.fromstring(
            get_content(qfile, url=url, refresh=refresh, params=params))

        self.xml = xml
        if xml.tag == 'service-error':
            raise Exception('\n{0}\n{1}'.format(EID, self.xml))

        # Parse coredata
        coredata = xml.find('dtd:coredata', ns)
        self._url = get_encoded_text(coredata, 'prism:url')
        self.identifier = get_encoded_text(coredata, 'dc:identifier')
        self.eid = get_encoded_text(coredata, 'dtd:eid')
        self._doi = get_encoded_text(coredata, 'prism:doi')
        self._title = get_encoded_text(coredata, 'dc:title')
        self._aggregationType = get_encoded_text(coredata,
                                                 'prism:aggregationType')
        self._publicationName = get_encoded_text(coredata,
                                                 'prism:publicationName')
        self._srctype = get_encoded_text(coredata, 'dtd:srctype')
        self._citedby_count = get_encoded_text(coredata, 'dtd:citedby-count')
        self._publisher = get_encoded_text(coredata, 'dc:publisher')
        self._source_id = get_encoded_text(coredata, 'dtd:source-id')
        self._issn = get_encoded_text(coredata, 'prism:issn')
        self._volume = get_encoded_text(coredata, 'prism:volume')
        self._issueIdentifier = get_encoded_text(coredata,
                                                 'prism:issueIdentifier')
        self._article_number = get_encoded_text(coredata, 'dtd:article-number')
        self._startingPage = get_encoded_text(coredata, 'prism:startingPage')
        self._endingPage = get_encoded_text(coredata, 'prism:endingPage')
        self._pageRange = get_encoded_text(coredata, 'prism:pageRange')
        self._coverDate = get_encoded_text(coredata, 'prism:coverDate')
        self.creator = get_encoded_text(coredata, 'dc:creator')
        self.description = get_encoded_text(coredata, 'dc:description')
        sl = coredata.find('dtd:link[@rel="scopus"]', ns).get('href')
        self_link = coredata.find('dtd:link[@rel="self"]', ns).get('href')
        cite_link = coredata.find('dtd:link[@rel="cited-by"]', ns)
        if cite_link:
            cite_link = cite_link.get('href')
        self.scopus_link = sl
        self.self_link = self_link
        self.cite_link = cite_link

        # Parse subject-areas
        subjectAreas = xml.find('dtd:subject-areas', ns)
        try:
            self._subjectAreas = [a.text for a in subjectAreas]
        except:
            self._subjectAreas = None

        # Parse authors
        authors = xml.find('dtd:authors', ns)
        self._authors = [_ScopusAuthor(author) for author in authors]
        self._affiliations = [
            _ScopusAffiliation(aff)
            for aff in xml.findall('dtd:affiliation', ns)
        ]

        # Parse items
        items = xml.find('item', ns)
        self._website = get_encoded_text(
            items, 'bibrecord/head/source/website/ce:e-address')
        try:
            self._citationType = items.find(
                'bibrecord/head/citation-info/citation-type').get("code")
        except:
            self._citationType = None
        try:
            self._citationLang = items.find(
                'bibrecord/head/citation-info/citation-language').get(
                    "language")
        except:
            self._citationLang = None
        try:
            self._references = tail.find('bibrecord/tail/bibliography', ns)
        except:
            self._references = None
Exemplo n.º 16
0
    def __init__(self, identifier, api, refresh, id_type=None, view=None,
                 date=None):
        """Class intended as superclass to perform retrievals.

        Parameters
        ----------
        identifier : str or int
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AbstractRetrieval, AuthorRetrieval, CitationOverview,
            ContentAffiliationRetrieval.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        id_type : str (optional, default=None)
            The type of used ID.
            Note: Will only take effect for the AbstractRetrieval API.

        view : str (optional, default=None)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files.  Allowed values: STANDARD,
            COMPLETE.
            Note: Will only take effect for the AbstractRetrieval API.

        date : str (optional, default=None)
            A string combining two years with a hyphen for which citations
            should be looked up for.
            Note: Will only take effect for the CitationOverview API.

        Raises
        ------
        ValueError
            If the api parameter or view parameter is an invalid entry.
        """
        # Checks
        if api not in URL:
            raise ValueError('api parameter must be one of ' +
                             ', '.join(URL.keys()))
        if not config.has_section('Directories'):
            create_config()

        # Construct parameters
        url = URL[api]
        if api == "AbstractRetrieval":
            url += id_type + "/"
        elif api == "AuthorRetrieval":
            view = 'ENHANCED'
        params = {'view': view}
        if api == 'CitationOverview':
            params.update({'date': date, 'scopus_id': identifier.split('0-')[-1]})
        url += identifier

        # Parse file contents
        qfile = join(config.get('Directories', api),
                     identifier.replace('/', '_'))
        res = get_content(qfile, refresh, url=url, accept='json',
                          params=params)
        self._json = loads(res.decode('utf-8'))
Exemplo n.º 17
0
    def __init__(self, author_id, refresh=False, refresh_aff=False, level=1):
        """Class to represent a Scopus Author query by the scopus-id.

        Parameters
        ----------
        author_id : str or int
            The ID of the author to search for.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file (if it exists) or not.

        refresh_aff : bool (optional, default=False)
            Whether to refresh the cached corresponding affiliation views
            (if they exist) or not.

        level : int (optional, default=1)
            Number of * to print in property __str__.

        Notes
        -----
        The files are cached in ~/.scopus/author/{author_id}.
        """
        author_id = str(int(author_id))

        self.level = level

        qfile = os.path.join(SCOPUS_AUTHOR_DIR, author_id)
        url = ('http://api.elsevier.com/content/author/'
               'author_id/{}').format(author_id)
        params = {'author_id': author_id, 'view': 'ENHANCED'}
        xml = ET.fromstring(
            get_content(qfile, url=url, refresh=refresh, params=params))
        self.xml = xml
        self._orcid = get_encoded_text(xml, 'coredata/orcid')
        hindex = get_encoded_text(xml, 'h-index')
        self._hindex = int(hindex) if hindex is not None else 0

        ndocuments = get_encoded_text(xml, 'coredata/document-count')
        self._ndocuments = int(ndocuments) if ndocuments is not None else 0

        _author_id = get_encoded_text(xml, 'coredata/dc:identifier')
        self._author_id = _author_id.split(":")[-1]

        citation_count = get_encoded_text(xml, 'coredata/citation-count')
        self._citation_count = int(
            citation_count) if citation_count is not None else 0

        ncited_by = get_encoded_text(xml, 'coredata/cited-by-count')
        self._ncited_by = int(ncited_by) if ncited_by is not None else 0

        ncoauthors = get_encoded_text(xml, 'coauthor-count')
        self._ncoauthors = int(ncoauthors) if ncoauthors is not None else 0

        self._current_affiliation = get_encoded_text(
            xml,
            'author-profile/affiliation-current/affiliation/ip-doc/afdispname')

        # affiliation history (sort out faulty historic affiliations)
        aff_ids = [
            el.attrib.get('affiliation-id') for el in xml.findall(
                'author-profile/affiliation-history/affiliation')
            if el is not None and len(list(el.find("ip-doc").iter())) > 1
        ]
        affs = [
            ScopusAffiliation(aff_id, refresh=refresh_aff)
            for aff_id in aff_ids
        ]
        self._affiliation_history = affs

        date_created = xml.find('author-profile/date-created', ns)
        if date_created is not None:
            self._date_created = (int(date_created.attrib['year']),
                                  int(date_created.attrib['month']),
                                  int(date_created.attrib['day']))
        else:
            self._date_created = (None, None, None)
        # Research areas
        self._area_elements = xml.findall('subject-areas/subject-area')
        # {code: name}
        d = {int(ae.attrib['code']): ae.text for ae in self._area_elements}

        freqs = xml.findall('author-profile/classificationgroup/'
                            'classifications[@type="ASJC"]/classification')
        # {code: frequency}
        c = {int(cls.text): int(cls.attrib['frequency']) for cls in freqs}
        self._subject_freq = c

        categories = [(d[code], c[code]) for code in d]
        categories.sort(reverse=True, key=itemgetter(1))
        self.categories = categories

        self._firstname = (get_encoded_text(
            xml, 'author-profile/preferred-name/given-name') or '')

        self._lastname = (get_encoded_text(
            xml, 'author-profile/preferred-name/surname') or '')

        self._name = (
            (get_encoded_text(xml, 'author-profile/preferred-name/given-name')
             or '') + ' ' +
            (get_encoded_text(xml, 'author-profile/preferred-name/surname')
             or ''))

        # Real website for the author
        self._scopus_url = xml.find('coredata/link[@rel="scopus-author"]')
        if self._scopus_url is not None:
            self._scopus_url = self._scopus_url.get('href')

        # API URL for coauthors
        self._coauthor_url = xml.find('coredata/link[@rel="coauthor-search"]')
        if self._coauthor_url is not None:
            self._coauthor_url = self._coauthor_url.get('href')

        # Publication history
        pub_hist_elements = self.xml.findall('author-profile/journal-history/')
        self._pub_hist = pub_hist_elements
Exemplo n.º 18
0
    def __init__(self,
                 query,
                 filepath,
                 url,
                 refresh,
                 count=200,
                 start=0,
                 max_entries=5000,
                 view='STANDARD'):
        """Class intended as superclass to perform a search query.

        Parameters
        ----------
        query : str
            A string of the query.

        filepath : str
            The complete filepath and -name of the cached file.

        url : str
            The API access point.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        count : int (optional, default=200)
            The number of entries to be displayed at once.  A smaller number
            means more queries with each query having less results.

        start : int (optional, default=0)
            The entry number of the first search item to start with.

        max_entries : int (optional, default=5000)
            Raise error when the number of results is beyond this number.
            The Scopus Search Engine does not allow more than 5000 entries.

        view : str (optional, default=STANDARD)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files.  Allowed values: STANDARD,
            COMPLETE.
            Note: Only the Scopus search API additionally uses view COMPLETE.

        Raises
        ------
        Exception
            If the number of search results exceeds max_entries.

        ValueError
            If the view parameters contains invalid entries.
        """
        allowed_views = ('STANDARD', 'COMPLETE')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))
        # Read the file contents if it exists and we are not refreshing
        if not refresh and exists(filepath):
            self._json = []
            with open(filepath) as f:
                for r in f.readlines():
                    self._json.append(loads(r))
        # Download file if cached file doesn't exists or we are refreshing
        else:
            # First, get a count of how many things to retrieve
            params = {'query': query, 'count': 0, 'start': 0, 'view': view}
            res = get_content(filepath,
                              url=url,
                              refresh=refresh,
                              params=params,
                              accept='json')
            data = loads(res.decode('utf-8'))['search-results']
            N = int(data.get('opensearch:totalResults', 0))
            if N > max_entries:
                raise Exception(('Found {} matches. '
                                 'Set max_entries to a higher number or '
                                 'change your query ({})').format(N, query))

            # Then download the information in chunks
            self._json = []
            while N > 0:
                params.update({'count': count, 'start': start})
                res = download(url=url, params=params, accept="json")
                results = res.json()

                if 'entry' in results.get('search-results', []):
                    for r in results['search-results']['entry']:
                        self._json.append({f: r[f] for f in r.keys()})
                start += count
                N -= count

            # Finally write out the file
            with open(filepath, 'wb') as f:
                for item in self._json:
                    f.write('{}\n'.format(dumps(item)).encode('utf-8'))
Exemplo n.º 19
0
    def __init__(self,
                 identifier,
                 api,
                 refresh,
                 id_type=None,
                 view=None,
                 date=None):
        """Class intended as superclass to perform retrievals.

        Parameters
        ----------
        identifier : str or int
            A string of the query.

        api : str
            The name of the Scopus API to be accessed.  Allowed values:
            AbstractRetrieval, AuthorRetrieval, CitationOverview,
            ContentAffiliationRetrieval.

        refresh : bool
            Whether to refresh the cached file if it exists or not.

        id_type : str (optional, default=None)
            The type of used ID.
            Note: Will only take effect for the AbstractRetrieval API.

        view : str (optional, default=None)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files.  Allowed values: STANDARD,
            COMPLETE.
            Note: Will only take effect for the AbstractRetrieval API.

        date : str (optional, default=None)
            A string combining two years with a hyphen for which citations
            should be looked up for.
            Note: Will only take effect for the CitationOverview API.

        Raises
        ------
        ValueError
            If the api parameter or view parameter is an invalid entry.
        """
        # Checks
        if api not in URL:
            raise ValueError('api parameter must be one of ' +
                             ', '.join(URL.keys()))
        if not config.has_section('Directories'):
            create_config()

        # Construct parameters
        url = URL[api]
        if api == "AbstractRetrieval":
            url += id_type + "/"
        elif api == "AuthorRetrieval":
            view = 'ENHANCED'
        params = {'view': view}
        if api == 'CitationOverview':
            params.update({
                'date': date,
                'scopus_id': identifier.split('0-')[-1]
            })
        url += identifier

        # Parse file contents
        qfile = join(config.get('Directories', api),
                     identifier.replace('/', '_'))
        res = get_content(qfile,
                          refresh,
                          url=url,
                          accept='json',
                          params=params)
        self._json = loads(res.decode('utf-8'))
Exemplo n.º 20
0
    def __init__(self, ISSN, refresh=False):
        ISSN = str(ISSN)
        self.issn = ISSN

        qfile = os.path.join(SCOPUS_ISSN_DIR, ISSN)
        url = ("https://api.elsevier.com/content/serial/title/issn:" + ISSN)
        self.xml = ET.fromstring(get_content(qfile, refresh, url))

        self.publisher = get_encoded_text(self.xml, 'entry/dc:publisher')
        self.title = get_encoded_text(self.xml, 'entry/dc:title')
        self.aggregationType = get_encoded_text(self.xml,
                                                'entry/prism:aggregationType')
        self.prism_url = get_encoded_text(self.xml, 'entry/prism:url')

        # Impact factors
        SNIP = get_encoded_text(self.xml, 'entry/SNIPList/SNIP')
        SNIP_year = self.xml.find('entry/SNIPList/SNIP', ns)
        if SNIP_year is not None:
            SNIP_year = SNIP_year.get('year')
        else:
            SNIP_year = -1

        IPP = get_encoded_text(self.xml, 'entry/IPPList/IPP')
        IPP_year = self.xml.find('entry/IPPList/IPP', ns)
        if IPP_year is not None:
            IPP_year = IPP_year.get('year')
        else:
            IPP_year = -1

        SJR = get_encoded_text(self.xml, 'entry/SJRList/SJR')
        SJR_year = self.xml.find('entry/SJRList/SJR', ns)
        if SJR_year is not None:
            SJR_year = SJR_year.get('year')
        else:
            SJR_year = -1
        if SNIP:
            self.SNIP = float(SNIP)
            self.SNIP_year = int(SNIP_year)
        else:
            self.SNIP = None
            self.SNIP_year = None

        if IPP:
            self.IPP = float(IPP)
            self.IPP_year = int(IPP_year)
        else:
            self.IPP = None
            self.IPP_year = None

        if SJR:
            self.SJR = float(SJR)
            self.SJR_year = int(SJR_year)
        else:
            self.SJR = None
            self.SJR_year = None

        scopus_url = self.xml.find('entry/link[@ref="scopus-source"]')
        if scopus_url is not None:
            self.scopus_url = scopus_url.attrib['href']
        else:
            self.scopus_url = None

        homepage = self.xml.find('entry/link[@ref="homepage"]')
        if homepage is not None:
            self.homepage = homepage.attrib['href']
        else:
            self.homepage = None
Exemplo n.º 21
0
    def __init__(self, EID, view='META_ABS', refresh=False):
        """Class to represent the results from a Scopus abstract.

        Parameters
        ----------
        EID : str
            The Scopus ID (EID) of an abstract.

        view : str (optional, default=META_ABS)
            The view of the file that should be downloaded.  Will not take
            effect for already cached files. Supported values: META, META_ABS,
            FULL, where FULL includes all information of META_ABS view and
            META_ABS includes all information of the META view .  See
            https://dev.elsevier.com/guides/AbstractRetrievalViews.htm
            for details.

        refresh : bool (optional, default=False)
            Whether to refresh the cached file if it exists or not.

        Notes
        -----
        The files are cached in ~/.scopus/xml/{eid}.
        """
        allowed_views = ('META', 'META_ABS', 'FULL')
        if view not in allowed_views:
            raise ValueError('view parameter must be one of ' +
                             ', '.join(allowed_views))

        # Get file content
        qfile = os.path.join(SCOPUS_XML_DIR, EID)
        url = "https://api.elsevier.com/content/abstract/eid/{}".format(EID)
        params = {'view': view}
        xml = ET.fromstring(
            get_content(qfile, url=url, refresh=refresh, params=params))
        # Remove default namespace if present
        remove = u'{http://www.elsevier.com/xml/svapi/abstract/dtd}'
        nsl = len(remove)
        for elem in xml.getiterator():
            if elem.tag.startswith(remove):
                elem.tag = elem.tag[nsl:]

        self.xml = xml
        if xml.tag == 'service-error':
            raise Exception('\n{0}\n{1}'.format(EID, self.xml))

        # Parse coredata
        coredata = xml.find('coredata', ns)
        self._url = get_encoded_text(coredata, 'prism:url')
        self.identifier = get_encoded_text(coredata, 'dc:identifier')
        self.eid = get_encoded_text(coredata, 'eid')
        self._doi = get_encoded_text(coredata, 'prism:doi')
        self._title = get_encoded_text(coredata, 'dc:title')
        self._aggregationType = get_encoded_text(coredata,
                                                 'prism:aggregationType')
        self._publicationName = get_encoded_text(coredata,
                                                 'prism:publicationName')
        self._srctype = get_encoded_text(coredata, 'srctype')
        self._citedby_count = get_encoded_text(coredata, 'citedby-count')
        self._publisher = get_encoded_text(coredata, 'dc:publisher')
        self._source_id = get_encoded_text(coredata, 'source-id')
        self._issn = get_encoded_text(coredata, 'prism:issn')
        self._volume = get_encoded_text(coredata, 'prism:volume')
        self._issueIdentifier = get_encoded_text(coredata,
                                                 'prism:issueIdentifier')
        self._article_number = get_encoded_text(coredata, 'article-number')
        self._startingPage = get_encoded_text(coredata, 'prism:startingPage')
        self._endingPage = get_encoded_text(coredata, 'prism:endingPage')
        self._pageRange = get_encoded_text(coredata, 'prism:pageRange')
        self._coverDate = get_encoded_text(coredata, 'prism:coverDate')
        self.creator = get_encoded_text(coredata, 'dc:creator')
        self._description = get_encoded_text(coredata, 'dc:description')
        self._abstract = get_encoded_text(coredata,
                                          'dc:description/abstract/ce:para')

        self.scopus_link = coredata.find('link[@rel="scopus"]', ns).get('href')
        self.self_link = coredata.find('link[@rel="self"]', ns).get('href')
        cite_link = coredata.find('link[@rel="cited-by"]', ns)
        if cite_link:
            cite_link = cite_link.get('href')
        self.cite_link = cite_link

        # Parse authkeywords
        author_keywords = xml.find('authkeywords', ns)
        try:
            self._authkeywords = [a.text for a in author_keywords]
        except:
            self._authkeywords = None

        # Parse subject-areas
        subjectAreas = xml.find('subject-areas', ns)
        try:
            self._subjectAreas = [a.text for a in subjectAreas]
        except:
            self._subjectAreas = None

        # Parse authors
        authors = xml.find('authors', ns)
        try:
            self._authors = [_ScopusAuthor(author) for author in authors]
        except TypeError:
            self._authors = None
        self._affiliations = [
            _ScopusAffiliation(aff) for aff in xml.findall('affiliation', ns)
        ]

        # Parse items
        items = xml.find('item', ns)
        self._website = get_encoded_text(
            items, 'bibrecord/head/source/website/ce:e-address')
        try:
            self._citationType = items.find(
                'bibrecord/head/citation-info/citation-type').get("code")
        except:
            self._citationType = None
        try:
            self._citationLang = items.find(
                'bibrecord/head/citation-info/citation-language').get(
                    "language")
        except:
            self._citationLang = None
        try:
            self._references = items.find('bibrecord/tail/bibliography', ns)
        except:
            self._references = None