示例#1
0
class TestElsAffil:
    """Test affiliation functionality"""
    
    ## Test data
    aff_uri = "https://api.elsevier.com/content/affiliation/affiliation_id/60101411"
    aff_id_int = 60101411
    aff_id_str = "60101411"
    
    ## Test initialization
    def test_init_uri(self):
        """ Test case: uri is set correctly during initialization with uri"""
        myAff = ElsAffil(uri = self.aff_uri)
        assert myAff.uri == self.aff_uri
        
    def test_init_aff_id_int(self):
        """ Test case: uri is set correctly during initialization with affiliation id as integer"""
        myAff = ElsAffil(affil_id = self.aff_id_int)
        assert myAff.uri == self.aff_uri
        
    def test_init_aff_id_str(self):
        """ Test case: uri is set correctly during initialization with affiliation id as string"""
        myAff = ElsAffil(affil_id = self.aff_id_str)
        assert myAff.uri == self.aff_uri
        
    ## Test reading/writing author profile data
    bad_client = ElsClient("dummy")
    good_client = ElsClient(config['apikey'], inst_token = config['insttoken'])
    good_client.local_dir = str(test_path)

    myAff = ElsAffil(uri = aff_uri)
    
    def test_read_good_bad_client(self):
        """Test case: using a well-configured client leads to successful read
            and using a badly-configured client does not."""
        assert self.myAff.read(self.bad_client) == False
        assert self.myAff.read(self.good_client) == True

    def test_json_to_dict(self):
        """Test case: the JSON read by the author object from the API is parsed
            into a Python dictionary"""
        assert type(self.myAff.data) == dict
        
    def test_name_getter(self):
        """Test case: the name attribute is returned as a non-empty string"""
        assert (type(self.myAff.name) == str and self.myAff.name != '')
        
    def test_write(self):
        """Test case: the author object's data is written to a file with the author
            ID in the filename"""
        self.myAff.write()
        assert util.file_exist_with_id(self.myAff.data['coredata']['dc:identifier'].split(':')[1])

    def test_read_docs(self):
        self.myAff.read_docs()
        assert len(self.myAff.doc_list) == int(self.myAff.data['coredata']['document-count'])
示例#2
0
class TestFullDoc:
    """Test ScienceDirect article functionality"""
    
    ## Test data
    full_pii_uri = "https://api.elsevier.com/content/article/pii/S1674927814000082"
    sd_pii = 'S1674927814000082'
    full_doi_uri = "https://api.elsevier.com/content/article/doi/10.1016/S1525-1578(10)60571-5"
    doi = '10.1016/S1525-1578(10)60571-5'
    
    ## Test initialization
    def test_init_uri(self):
        """ Test case: uri is set correctly during initialization with uri"""
        myFullDoc = FullDoc(uri = self.full_pii_uri)
        assert myFullDoc.uri == self.full_pii_uri
        
    def test_init_sd_pii(self):
        """ Test case: uri is set correctly during initialization with ScienceDirect PII"""
        myFullDoc = FullDoc(sd_pii = self.sd_pii)
        assert myFullDoc.uri == self.full_pii_uri
        
    def test_init_doi(self):
        """ Test case: uri is set correctly during initialization with DOI"""
        myFullDoc = FullDoc(doi = self.doi)
        assert myFullDoc.uri == self.full_doi_uri
        
    ## Test reading/writing author profile data
    bad_client = ElsClient("dummy")
    good_client = ElsClient(config['apikey'], inst_token = config['insttoken'])
    good_client.local_dir = str(test_path)

    myFullDoc = FullDoc(uri = full_pii_uri)
    
    def test_read_good_bad_client(self):
        """Test case: using a well-configured client leads to successful read
            and using a badly-configured client does not."""
        assert self.myFullDoc.read(self.bad_client) == False
        assert self.myFullDoc.read(self.good_client) == True

    def test_json_to_dict(self):
        """Test case: the JSON read by the full article object from the 
            API is parsed into a Python dictionary"""
        assert type(self.myFullDoc.data) == dict
        
    def test_title_getter(self):
        """Test case: the title attribute is returned as a non-empty string"""
        assert (type(self.myFullDoc.title) == str and self.myFullDoc.title != '')
        
    def test_write(self):
        """Test case: the full article object's data is written to a file with the ID in the filename"""
        self.myFullDoc.write()
        ## TODO: replace following (strung-together replace) with regex
        assert util.file_exist_with_id(
                self.myFullDoc.data['coredata']['pii'].replace('-','').replace('(','').replace(')',''))
示例#3
0
class TestAbsDoc:
    """Test Scopus document functionality"""
    
    ## Test data
    abs_uri = "https://api.elsevier.com/content/abstract/scopus_id/84872135457"
    scp_id_int = 84872135457
    scp_id_str = "84872135457"
    
    ## Test initialization
    def test_init_uri(self):
        """ Test case: uri is set correctly during initialization with uri"""
        myAbsDoc = AbsDoc(uri = self.abs_uri)
        assert myAbsDoc.uri == self.abs_uri
        
    def test_init_scp_id_int(self):
        """ Test case: uri is set correctly during initialization with Scopus id as integer"""
        myAbsDoc = AbsDoc(scp_id = self.scp_id_int)
        assert myAbsDoc.uri == self.abs_uri
        
    def test_init_scp_id_str(self):
        """ Test case: uri is set correctly during initialization with Scopus id as string"""
        myAbsDoc = AbsDoc(scp_id = self.scp_id_str)
        assert myAbsDoc.uri == self.abs_uri
        
    ## Test reading/writing author profile data
    bad_client = ElsClient("dummy")
    good_client = ElsClient(config['apikey'], inst_token = config['insttoken'])
    good_client.local_dir = str(test_path)

    myAbsDoc = AbsDoc(uri = abs_uri)
    
    def test_read_good_bad_client(self):
        """Test case: using a well-configured client leads to successful read
            and using a badly-configured client does not."""
        assert self.myAbsDoc.read(self.bad_client) == False
        assert self.myAbsDoc.read(self.good_client) == True

    def test_json_to_dict(self):
        """Test case: the JSON read by the abstract document object from the 
            API is parsed into a Python dictionary"""
        assert type(self.myAbsDoc.data) == dict
        
    def test_title_getter(self):
        """Test case: the title attribute is returned as a non-empty string"""
        assert (type(self.myAbsDoc.title) == str and self.myAbsDoc.title != '')
        
    def test_write(self):
        """Test case: the abstract document object's data is written to a file with the Scopus
            ID in the filename"""
        self.myAbsDoc.write()
        assert util.file_exist_with_id(self.myAbsDoc.data['coredata']['dc:identifier'].split(':')[1])
def get_pubs_org_from_api(org_id: str, api_key=None) -> Optional[json.dumps]:
    """ Loads and returns data on publications of organization from Scopus via API.
    """
    client = ElsClient(api_key)
    search = ElsSearch(f"(AF-ID({org_id}))", 'scopus')  # AND PUBYEAR > 2019
    # todo переписать в асинхронном режиме
    search.execute(client,
                   get_all=True)  # загружаем данные по публикациям организации
    if client.req_status['status_code'] != 200:
        return None
    pubs = search.results

    logging.info(f'{len(pubs)} publications received')

    # составляем список тасков для загрузки данных по авторам
    tasks = defaultdict(list)
    for i, res in enumerate(pubs):
        for authors_link in res['link']:
            if authors_link['@ref'] == 'author-affiliation':
                tasks[i] = authors_link['@href']
                break

    header = get_header(api_key)
    result = async_fetch_urls(tasks.values(), header)
    for i, j in zip(tasks.keys(), result):
        pubs[i]['authors'] = j

    return pubs
示例#5
0
    def search(self):
        # initialize the keys
        keygen = self.key_generator()
        init_key = next(keygen)

        # Initialize the elsapy client
        client = ElsClient(init_key, view=self.view)
        count = 0

        folder = Path('result') / f'{self.subject}_{time.strftime("%Y%m%d")}'
        if not folder.exists():
            folder.mkdir(parents=True)

        for query in self.queries:

            try:
                name = next(self.names)
                name = '_'.join(name)
            except:
                # this could happen if your file name contains unexpected characters
                error_log.info(f'Name error at {query}.')
                break

            try:
                srch =ElsSearch(query, index=self.subject, keygen=keygen)
                srch.execute(client, get_all=True)
                count += 1
                print(f'Progress: {count}/{self.length}, {query}')
                if srch.status_code == 400:
                    error_log.info(f'Bad query: {name}')
                else:
                    search_log.info(f'Results found: {name}, # of results: {len(srch.results)}')
                    self.write_json(srch.results, name, folder)
            except Exception as e:
                error_log.info(f'Search error: {name}, {str(e)}')
示例#6
0
 def test_init_apikey_insttoken_path(self):
     """Test case: APIkey, insttoken and local path are set correctly during initialization"""
     loc_dir = '\\TEMP'
     my_client = ElsClient(config['apikey'], inst_token = config['insttoken'], local_dir = loc_dir)
     assert my_client.api_key == config['apikey']
     assert my_client.inst_token == config['insttoken']
     assert str(my_client.local_dir) == loc_dir
示例#7
0
 def test_set_apikey_insttoken(self):
     """Test case: APIkey and insttoken are set correctly using setters"""
     my_client = ElsClient("dummy")
     my_client.api_key = config['apikey']
     my_client.inst_token = config['insttoken']
     assert my_client.api_key == config['apikey']
     assert my_client.inst_token == config['insttoken']
示例#8
0
def search_my_query(my_query):
    '''
    Function to search a query in scopus
    :param my_query: string of query desired to be searched in scopus
    :return: resultant dataframe with query from scopus
    '''
    if type(my_query) == str:
        ## Load configuration
        con_file = open("config.json")
        config = json.load(con_file)
        con_file.close()

        ## Initialize client
        client = ElsClient(config['APIKey'])

        ## Initialize doc search object using Scopus and execute search, retrieving all results
        print('......Searching Scopus......')
        print('......for..... ' + query + ' ....')
        doc_srch = ElsSearch(query, 'scopus')
        doc_srch.execute(client, get_all=True)
        print("doc_srch has", len(doc_srch.results), "results.")

        return doc_srch.results_df
    else:
        print('the query must be a string. no searches run...')
        return
 def initClient(self):
     ## Initialize client
     try:
         self.client = ElsClient(self.config['apikey'])
         self.client.inst_token = self.config['insttoken']
     except:
         pass
示例#10
0
 def auth(self):
     con_file = open(self.config_path)
     config = json.load(con_file)
     con_file.close()
     
     self.client = ElsClient(config['apikey'])
     self.client.inst_token = config['insttoken']
示例#11
0
    def __init__(self, con_path):
        from elsapy.elsclient import ElsClient
        from elsapy.elssearch import ElsSearch
        with open(con_path) as con_file:
            self.config = json.load(con_file)

        self.client = ElsClient(self.config['apikey'])
    def __init__(self):
        ## Load configuration
        con_file = open("config.json")
        config = json.load(con_file)
        con_file.close()

        ## Initialize client
        self.client = ElsClient(config['apikey'])
        self.client.inst_token = config['insttoken']
示例#13
0
    def __init__(self, path):
        self.path = path
        con_file = open("config.json")
        config = json.load(con_file)
        con_file.close()

        ## Initialize client
        self.client = ElsClient(config['apikey'])
        self.client.inst_token = config['insttoken']
def initialiseScopus():
    ## Load configuration
    con_file = open("config.json")
    config = json.load(con_file)
    con_file.close()

    ## Initialize client
    client = ElsClient(config['apikey'])
    return client
    def __init__(self):
        ## Load configuration
        con_file = open("config.json")
        config = json.load(con_file)
        con_file.close()

        ## Initialize client
        self.client = ElsClient(config['apikey'])
        self.client.inst_token = config['insttoken']
        self.psa = ParseSpecialAuthors()
        self.countries_by_num_authors = [set() for _ in range(max_num_authors)]
示例#16
0
def main():
    # Load author ID list
    with open('authors.json', 'r', encoding='utf-8') as fp:
        data = json.load(fp)
        author_list = data['ids']

    ## Load configuration
    with open("config.json") as con_file:
        config = json.load(con_file)

    ## Initialize client
    client = ElsClient(config['apikey'])
    client.inst_token = config['insttoken']

    get_metrics(client, author_list)
    def search(self, query_name):
        """Do a search.

        Args:
            query_name (str): the name of the file in the ./queries/ directory
                that contains the query. Defaults to "query.txt".

        Raises:
            FileNotFoundError if the file query file can not be found.

        Returns:
            list: The results.
        """
        query = self._load_query(query_name)
        client = ElsClient(self.api_key)
        search = ElsSearch(query, "scopus")
        search.execute(client)
        return search.results
def getInfoAboutTeacher(person):
    # Load configuration
    con_file = open(SCOPUS_CREDENTIAL_FILE)
    config = json.load(con_file)
    con_file.close()

    # Initialize client
    client = ElsClient(config['apikey'])
    client.inst_token = config['insttoken']

    # Initialize author with uri
    my_auth = ElsAuthor(
        uri='https://api.elsevier.com/content/author/author_id/' +
        str(person.scopusId))
    # Read author data, then write to disk
    if my_auth.read(client):
        return my_auth.data['coredata']
    else:
        print("Read author failed.")
示例#19
0
class TestSearch:
    """Test search functionality"""

    ## Test data
    base_url = u'https://api.elsevier.com/content/search/'
    search_types = [
     {"query" : "authlast(keuskamp)", "index" : "author"},
     {"query" : "affil(amsterdam)", "index" : "affiliation"},
     {"query" : "AFFIL(dartmouth) AND AUTHOR-NAME(lewis) AND PUBYEAR > 2011",
              "index" : "scopus"},
     {"query" : "star trek vs star wars", "index" : "sciencedirect"}
    ]
    
    searches = [ ElsSearch(search_type["query"], search_type["index"])
        for search_type in search_types]
    
    good_client = ElsClient(config['apikey'], inst_token = config['insttoken'])

        
    ## Test initialization
    def test_init_uri(self):
        """Test case: query, index and uri are set correctly during
        initialization"""
        match_all = True
        for i in range(len(self.search_types)):
            if (self.searches[i].query != self.search_types[i]['query'] or 
                self.searches[i].index != self.search_types[i]['index'] or
                self.searches[i].uri != (self.base_url + 
                             self.search_types[i]['index'] + 
                             '?query=' + 
                             url_encode(self.search_types[i]['query']))):
                match_all = False
        assert match_all == True
    
    def test_execution(self):
        '''Test case: all searches are executed without raising an exception.'''
        for search in self.searches:
            search.execute(self.good_client)
        assert True
示例#20
0
def find_articles(year=None,
                  issn=None,
                  get_all=True,
                  id_type="doi",
                  apikey=None):
    """
    Returns a list of the DOI's for all articles published in the specified year and journal.

    Args:
        year (str): year of publication
        issn (str): ISSN (or EISSN) of journal
        get_all (bool): Whether all results should be returned or just the 1st result. Default is True.
        id_type: (str) Return document eids or dois. Default is doi.

    Returns:
        ids (str): The eids/dois for all articles published in corresponding journal in the specified year

    """

    query = build_scopus_query(year=year, issn=issn)
    if apikey:
        CLIENT = ElsClient(apikey, num_res=10000)
    search = ElsSearch(
        query,
        index='scopus',
    )
    search.execute(els_client=CLIENT, get_all=get_all)
    if id_type == "doi":
        key = 'prism:doi'
    else:
        key = id_type
    ids = []
    for r in search.results:
        try:
            ids.append(r[key])
        except:
            continue
    return ids
    def get_doc(self, dtype, identity):
        """
        This method retrieves a 'Doc' object from the Elsevier API. The doc object contains metadata and full-text information
        about a publication associated with a given PII.

        Parameters:
        -----------
        dtype(str,required): The type of identification string being used to access the document. (Almost always PII in our case.)

        identity: The actual identification string/ PII that will be used to query.
        """
        if dtype == 'pii':
            doc = FullDoc(sd_pii = identity)
        elif dtype == 'doi':
            doc= FullDoc(doi = identity)

        if doc.read(ElsClient(self.API_list[0])):
                #print ("doc.title: ", doc.title)
                doc.write()
        else:
            print ("Read document failed.")

        return doc
示例#22
0
class TestElsAuthor:
    """Test author object functionality"""
    
    ## Test data
    auth_uri = "https://api.elsevier.com/content/author/author_id/55070335500"
    auth_id_int = 55070335500
    auth_id_str = "55070335500"

    ## Test initialization
    def test_init_uri(self):
        """ Test case: uri is set correctly during initialization with uri"""
        myAuth = ElsAuthor(uri = self.auth_uri)
        assert myAuth.uri == self.auth_uri
        
    def test_init_auth_id_int(self):
        """ Test case: uri is set correctly during initialization with author id as integer"""
        myAuth = ElsAuthor(author_id = self.auth_id_int)
        assert myAuth.uri == self.auth_uri
        
    def test_init_auth_id_str(self):
        """ Test case: uri is set correctly during initialization with author id as string"""
        myAuth = ElsAuthor(author_id = self.auth_id_str)
        assert myAuth.uri == self.auth_uri
        
    ## Test reading/writing author profile data
    bad_client = ElsClient("dummy")
    good_client = ElsClient(config['apikey'], inst_token = config['insttoken'])
    good_client.local_dir = str(test_path)

    myAuth = ElsAuthor(uri = auth_uri)
    
    def test_read_good_bad_client(self):
        """Test case: using a well-configured client leads to successful read
            and using a badly-configured client does not."""
        assert self.myAuth.read(self.bad_client) == False
        assert self.myAuth.read(self.good_client) == True

    def test_json_to_dict(self):
        """Test case: the JSON read by the author object from the API is parsed
            into a Python dictionary"""
        assert type(self.myAuth.data) == dict
        
    def test_name_getter(self):
        """Test case: the full name attribute is returned as a non-empty string"""
        assert (type(self.myAuth.full_name) == str and self.myAuth.full_name != '')
        
    def test_write(self):
        """Test case: the author object's data is written to a file with the author
            ID in the filename"""
        self.myAuth.write()
        assert util.file_exist_with_id(self.myAuth.data['coredata']['dc:identifier'].split(':')[1])

    def test_read_docs(self):
        self.myAuth.read_docs()
        assert len(self.myAuth.doc_list) > 0
        ## TODO: once author metrics inconsistency is resolved, change to: 
        # assert len(self.myAuth.doc_list) == int(self.myAuth.data['coredata']['document-count'])
        
    def test_read_metrics_new_author(self):
        myAuth = ElsAuthor(uri = self.auth_uri)
        myAuth.read_metrics(self.good_client)
        assert (
            myAuth.data['coredata']['citation-count'] and  
            myAuth.data['coredata']['cited-by-count'] and 
            myAuth.data['coredata']['document-count'] and 
            myAuth.data['h-index'])
            
    def test_read_metrics_existing_author(self):
        self.myAuth.read_metrics(self.good_client)
        assert (
            self.myAuth.data['coredata']['citation-count'] and  
            self.myAuth.data['coredata']['cited-by-count'] and 
            self.myAuth.data['coredata']['document-count'] and 
            self.myAuth.data['h-index'])
示例#23
0
import pandas as pd
from elsapy.elsclient import ElsClient
from elsapy.elssearch import ElsSearch
import json
fefu_id = '60103811'
con_file = open("config.json")
config = json.load(con_file)
con_file.close()
# view = 'COMPLETE' -- to access more fields
client = ElsClient(config['apikey'], num_res = 25)

search = ElsSearch('AF-ID( ' + fefu_id + ' )', 'scopus')
search.execute(client)
sr = search.results

result = []
res = {}

authorname = ''
authid = ''

special_fields = ['authname', 'authid', 'prism:coverDisplayDate',
                  'prism:pageRange', 'openaccessFlag', 'link', 'prism:coverDate']
fields = {
        'authname'                  : 'Authors',
        'authid'                    : 'Author(s) ID',
        'dc:title'                  : 'Title', 
        'prism:coverDate'           : 'Year', 
        'prism:publicationName'     : 'Source title',
        'prism:volume'              : 'Volume',
        'prism:doi'                 : 'DOI',
示例#24
0
    def auth_query(auth_last, auth_first):

        auth_data = [auth_last, auth_first]
        print("Searching for author %s, %s" % (auth_last, auth_first))
        # Initialize search object and execute search under the author index
        query = 'authlast(%s)+AND+authfirst(%s)' % (auth_last, auth_first)

        try:
            auth_srch = ElsSearch(query, 'author')
            auth_srch.execute(client, get_all=False)

        except:
            # Load other configuration with new API Key
            con_file = open("config2.json")
            config = json.load(con_file)
            con_file.close()

            # Initialize new client
            client = ElsClient(config['apikey'])
            client.inst_token = config['insttoken']

            auth_srch = ElsSearch(query, 'author')
            auth_srch.execute(client, get_all=False)

        if (len(auth_srch.results) == 1):
            print("auth_srch has", len(auth_srch.results), "result.")
        else:
            print("auth_srch has", len(auth_srch.results), "results.")

        # checking if no results at all
        error_message = auth_srch.results[0].get('error')

        if (len(auth_srch.results) > 0):

            if (not error_message):
                # grabs the author_id from the search data
                # this assumes that the wanted author is the first one in results
                # check this out later
                try:
                    string_author_id = auth_srch.results[0].get('dc:identifier')
                    # this line cuts the author id string from the end of AUTHOR_ID
                    # to the end of the id digits
                    author_id = string_author_id[10:]
                    print("author_id : %s" % author_id)
                    auth_data.append(author_id)
                except AttributeError:
                    print("Could not extract auth_id field for %s, %s" % (auth_last, auth_first))
                    auth_data.append("CNE")

                # grabs the curr_affil from the search data
                # appends it to auth_data
                try:
                    dict_curr_affil = auth_srch.results[0].get('affiliation-current')
                    curr_affil = dict_curr_affil.get('affiliation-name')
                    print("curr_affil : %s" % curr_affil)
                    auth_data.append(curr_affil)
                except AttributeError:
                    print("Could not extract curr_affil field for %s, %s" % (auth_last, auth_first))
                    auth_data.append("CNE")

            # this could be a false positive! the author name could be in the name-variant field
            # I redo the query down below in the next function
            else:
                auth_data.append("DNE")
                auth_data.append("DNE")
                print(error_message)

        else:
            print("very bad error @ length of auth_srch.results <= 0")
            auth_data.append("none")
            auth_data.append("none")

        return auth_data
示例#25
0
    def from_database(self, time_constraint):

        con_file = open("config.json")
        config = json.load(con_file)
        con_file.close()
        client = ElsClient(config['apikey'])
        ###TODO: add year back in??
        searchwords = {'category': [], 'specific': []}

        if self.analysis_type == 'supervised':
            tech_words = ["machine learning"]

        elif self.analysis_type == 'unsupervised':
            tech_words = ["clustering"]

        for name, obj in inspect.getmembers(MLTechniques):
            if inspect.isclass(obj):
                if obj.TECHNIQUE_TYPE == self.analysis_type:
                    if not obj.ISDEEP or time_constraint > 1:
                        searchwords['specific'].append(obj.get_name())
                        searchwords['category'].append(obj.get_category())

        print(searchwords['category'])
        textmine_results = {'words': [], 'scores': [], 'allwords': []}

        print("-----UNKNOWN DATA DETECTED: INITIATING TEXT MINING-----")
        print()
        allurls = []

        combos = self.generate_combinations(self.queries, tech_words)

        if time_constraint == 1:
            query_size = set_query_number(combos, 100)
        if time_constraint == 2:
            query_size = set_query_number(combos, 250)
        if time_constraint == 3:
            query_size = set_query_number(combos, 500)
        if time_constraint == 4:
            query_size = set_query_number(combos, 750)
        if time_constraint == 5:
            query_size = set_query_number(combos, 1000)

        i = 0
        for n, combo in enumerate(combos):
            print("SEARCH QUERY " + str(n + 1) + ":")
            print(combo)
            print()

            string = ""
            for word in combo:
                string += (word + " ")

            doc_srch = ElsSearch(string, 'sciencedirect')
            results = TEXTMINE.execute_modified(doc_srch.uri,
                                                client,
                                                get_all=True,
                                                set_limit=query_size)

            if results != 0:
                print("SUCCESSFUL QUERY")
                for num, res in enumerate(results):

                    DOI = res['prism:doi']
                    URL = 'https://api.elsevier.com/content/article/DOI/' + str(
                        DOI) + "?APIkey=" + str(config['apikey'])
                    if URL not in allurls:

                        r = requests.get(URL)
                        allurls.append(URL)

                        with open(str(self.user_id), 'w') as f:
                            f.write(r.text)
                        f.close()

                        foundwords, allwords = TEXTPROCESS.findkeywords(
                            str(self.user_id), searchwords, self.user_keywords)
                        if len(list(foundwords.keys())):
                            i += 1
                            print(i)
                        textmine_results['words'].extend(
                            list(foundwords.keys()))
                        textmine_results['scores'].extend(
                            list(foundwords.values()))
                        textmine_results['allwords'].extend(allwords)
                        os.remove(str(self.user_id))

        print("------MINING COMPLETE: SEARCHING FOR KEYWORDS-----")
        keywords, keyword_scores = self.adjust_output(textmine_results)
        return keywords, keyword_scores, searchwords
示例#26
0
 def test_init_apikey_insttoken(self):
     """Test case: APIkey and insttoken are set correctly during initialization"""
     my_client = ElsClient(config['apikey'], inst_token = config['insttoken'])
     assert my_client.api_key == config['apikey']
     assert my_client.inst_token == config['insttoken']
示例#27
0
def detailed_auth_query(auth_last, auth_first):

    auth_data = [auth_last, auth_first, '', '']
    print("Searching for author %s, %s" % (auth_last, auth_first))
    # Initialize search object and execute search under the author index
    query = 'authlast(%s)+AND+authfirst(%s)' % (auth_last, auth_first)

    try:
        auth_srch = ElsSearch(query, 'author')
        auth_srch.execute(client, get_all=False)

    except:
        # Load other configuration with new API Key
        con_file = open("config2.json")
        config = json.load(con_file)
        con_file.close()

        # Initialize new client
        client = ElsClient(config['apikey'])
        client.inst_token = config['insttoken']

        auth_srch = ElsSearch(query, 'author')
        auth_srch.execute(client, get_all=False)

    if (len(auth_srch.results) == 1):
        print("auth_srch has", len(auth_srch.results), "result.")
    else:
        print("auth_srch has", len(auth_srch.results), "results.")

    # checking if no results at all
    error_message = auth_srch.results[0].get('error')

    if (len(auth_srch.results) > 0):

        if (not error_message):

            print("Into the results...")

            # grabs the author_id from the search data
            for i in range(len(auth_srch.results)):

                try:
                    string_author_id = auth_srch.results[i].get('dc:identifier')
                    # this line cuts the author id string from the end of AUTHOR_ID
                    # to the end of the id digits
                    author_id = string_author_id[10:]
                    print("author_id : %s" % author_id)
                    auth_data[2] = author_id

                except AttributeError:
                    print("Could not extract auth_id field for %s, %s" % (auth_last, auth_first))
                    auth_data[2] = "CNE"

                # grabs the curr_affil from the search data
                # appends it to auth_data
                try:
                    dict_curr_affil = auth_srch.results[i].get('affiliation-current')
                    curr_affil = dict_curr_affil.get('affiliation-name')
                    print("curr_affil : %s" % curr_affil)

                except AttributeError:
                    print("Could not extract curr_affil field for %s, %s" % (auth_last, auth_first))
                    auth_data[3] = "CNE"

                try:
                    # if UR not current affil go on and search history
                    if (not isUR(curr_affil)):

                        affil_hist = auth_id_query(auth_data[2])

                        try:
                            if (len(affil_hist) > 1):
                                for institution in affil_hist:
                                    try:
                                        affil_instance = institution['ip-doc']['preferred-name']['$']
                                        # if UR affil is found, return immediately
                                        if (isUR(affil_instance)):
                                            curr_affil = affil_instance
                                            auth_data[3] = curr_affil
                                            return auth_data
                                    except:
                                        print("Affiliation instance data for %s,%s wasn't structured correctly." % (auth_data[0], auth_data[1]))
                                        # print(institution)
                            else:
                                try:
                                    affil_instance = affil_hist['ip-doc']['preferred-name']['$']
                                    try:
                                        # if UR affil is found, return immediately
                                        if (isUR(affil_instance)):
                                            curr_affil = affil_instance
                                            auth_data[3] = curr_affil
                                            return auth_data
                                    except TypeError:
                                        print("isUR error")
                                        print(affil_instance)
                                except:
                                    print("Affiliation instance data for %s,%s wasn't structured correctly." % (auth_data[0], auth_data[1]))
                                    # print(institution)

                        except TypeError:
                            print("Type Error occured for affil_hist of %s,%s" % (auth_data[0], auth_data[1]))
                            print(affil_hist)

                    # but if it is then return immediately
                    else:
                        print("Returned with curr_affil : '%s' for %s,%s" % (curr_affil, auth_data[0], auth_data[1]))
                        auth_data[3] = curr_affil
                        return auth_data

                except:
                    print("Something wrong within the returned profile data of %s,%s" % (auth_data[0], auth_data[1]))

            # this is the case of hitting the cap of 25, too many people down the list
            if (len(auth_srch.results) >= 25):
                print("Results CAP of 25 was hit for the %d results of %s,%s" % (len(auth_srch.results), auth_data[0], auth_data[1]))
                auth_data[3] = 'max'
                return auth_data

            # this covers the case of no UR affils found at all
            elif (len(auth_srch.results) < 25):
                print("EXHAUSTED results list of %d results for %s,%s" % (len(auth_srch.results), auth_data[0], auth_data[1]))
                auth_data[3] = 'na'
                return auth_data

        # this could be a false positive! the author name could be in the name-variant field
        # I redo the query down below in the next function
        else:
            auth_data[2] = 'DNE'
            auth_data[3] = 'DNE'
            print(error_message)

    else:
        print("very bad error @ length of auth_srch.results <= 0")
        auth_data[2] = 'NONE'
        auth_data[3] = 'NONE'

    return auth_data
示例#28
0
# -------------------------------------------------------------------------------

#import elsapy

from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor
from elsapy.elssearch import ElsSearch
import pandas as pd

names_csv = 'sustainability-persons_no_sir_sub.csv'
df = pd.read_csv(names_csv, error_bad_lines=False)

API_KEY = 'd54807cb12735c3d461f169c0ae75a2e'

## Initialize client
client = ElsClient(API_KEY)

query = 'AUTHFIRST(%s) AND AUTHLASTNAME(%s)'  # AND AF-ID(60003892)'

#name_list = df["Name"][86].split()
#first,last = name_list[0],name_list[len(name_list)-1]
name = df["Name"].iloc[26]

profile_urls = []
for name in df["Name"]:
    name_list = name.split()
    first, last = name_list[0], name_list[len(name_list) - 1]
    auth_srch = ElsSearch(query % (first, last), 'author')
    auth_srch.execute(client)
    #print ("auth_srch has", len(auth_srch.results), "results.")
    try:
with open('config.json') as config_file:
    config = json.load(config_file)

GET_ALL = config[
    'get_all']  # False gets one chunk (25) True gets all or max (5000)
FULL_TEXT = config['full_text']  # Save fulltext
OPEN_ACCESS = config[
    'open_access']  # Search only openaccess documents (so we can get the full text)

# "public policy AND (impact OR result OR evaluation OR evidence) AND (climate OR environment)"
query = config['query']

if OPEN_ACCESS:
    query = "openaccess(1) AND " + query

client = ElsClient(config['api_key'])

doc_srch = ElsSearch(query, 'sciencedirect')
doc_srch.execute(client, get_all=GET_ALL)

for doc in doc_srch.results:
    doi = doc['dc:identifier']
    print(doi)
    if FULL_TEXT:
        ## ScienceDirect (full-text) document example using DOI
        doi_doc = FullDoc(doi=doi)
        if doi_doc.read(client):
            doi_doc.write()
        else:
            print("Read full-text failed for DOI", doi)
示例#30
0
def main():
    # Load author names list
    with open('authors.json', 'r', encoding='utf-8') as fp:
        data = json.load(fp)
        search_list = data['names']

    # Load configuration
    con_file = open("config.json")
    config = json.load(con_file)
    con_file.close()

    # Initialize client
    client = ElsClient(config['apikey'])
    client.inst_token = config['insttoken']

    # Run search for each author names in list and get IDs
    auth_id_list = []
    for author in search_list:
        search_query = ""
        if len(author[0]) > 0:
            search_query += f"authfirst({author[0]}) "
        if len(author[1]) > 0:
            search_query += f"authlast({author[1]})"

        auth_srch = ElsSearch(search_query, 'author')
        auth_srch.execute(client)
        print(
            f'\n{author[0]} {author[1]}: {len(auth_srch.results)} results found!\n'
        )

        # If there are more than one author that matches the search, display search results
        if len(auth_srch.results) > 1:
            for i, search_result in enumerate(auth_srch.results):
                first_name = search_result['preferred-name']['given-name']
                surname = search_result['preferred-name']['surname']
                try:
                    affiliation = search_result['affiliation-current'][
                        'affiliation-name']
                    affiliation_country = search_result['affiliation-current'][
                        'affiliation-country']
                except KeyError:
                    affiliation = ''
                    affiliation_country = ''
                print(
                    f"[{i+1}] {first_name} {surname}, {affiliation} ({affiliation_country})"
                )

            # Choose desired author
            desired_author_index = int(input('\nChoose correct author: ')) - 1

        else:
            desired_author_index = 0

        # Get author ID
        desired_author = auth_srch.results[desired_author_index]
        link = desired_author['link'][0]['@href']
        auth_id = desired_author['dc:identifier'].split(':')[1]
        auth_id_list.append(auth_id)

    # Save author ID to JSON
    with open('authors.json', 'w', encoding='utf-8') as fp:
        data = {'ids': auth_id_list, 'names': search_list}
        json.dump(data, fp, indent=4, sort_keys=True)

    print(link)
    print('\n-----------\n')
    print('Grabbing author metrics...')

    get_author_by_id.get_metrics(client, auth_id_list)