class TestElsAffil: """Test affiliation functionality""" ## Test data aff_uri = "https://api.elsevier.com/content/affiliation/affiliation_id/60101411" aff_id_int = 60101411 aff_id_str = "60101411" ## Test initialization def test_init_uri(self): """ Test case: uri is set correctly during initialization with uri""" myAff = ElsAffil(uri = self.aff_uri) assert myAff.uri == self.aff_uri def test_init_aff_id_int(self): """ Test case: uri is set correctly during initialization with affiliation id as integer""" myAff = ElsAffil(affil_id = self.aff_id_int) assert myAff.uri == self.aff_uri def test_init_aff_id_str(self): """ Test case: uri is set correctly during initialization with affiliation id as string""" myAff = ElsAffil(affil_id = self.aff_id_str) assert myAff.uri == self.aff_uri ## Test reading/writing author profile data bad_client = ElsClient("dummy") good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) good_client.local_dir = str(test_path) myAff = ElsAffil(uri = aff_uri) def test_read_good_bad_client(self): """Test case: using a well-configured client leads to successful read and using a badly-configured client does not.""" assert self.myAff.read(self.bad_client) == False assert self.myAff.read(self.good_client) == True def test_json_to_dict(self): """Test case: the JSON read by the author object from the API is parsed into a Python dictionary""" assert type(self.myAff.data) == dict def test_name_getter(self): """Test case: the name attribute is returned as a non-empty string""" assert (type(self.myAff.name) == str and self.myAff.name != '') def test_write(self): """Test case: the author object's data is written to a file with the author ID in the filename""" self.myAff.write() assert util.file_exist_with_id(self.myAff.data['coredata']['dc:identifier'].split(':')[1]) def test_read_docs(self): self.myAff.read_docs() assert len(self.myAff.doc_list) == int(self.myAff.data['coredata']['document-count'])
class TestFullDoc: """Test ScienceDirect article functionality""" ## Test data full_pii_uri = "https://api.elsevier.com/content/article/pii/S1674927814000082" sd_pii = 'S1674927814000082' full_doi_uri = "https://api.elsevier.com/content/article/doi/10.1016/S1525-1578(10)60571-5" doi = '10.1016/S1525-1578(10)60571-5' ## Test initialization def test_init_uri(self): """ Test case: uri is set correctly during initialization with uri""" myFullDoc = FullDoc(uri = self.full_pii_uri) assert myFullDoc.uri == self.full_pii_uri def test_init_sd_pii(self): """ Test case: uri is set correctly during initialization with ScienceDirect PII""" myFullDoc = FullDoc(sd_pii = self.sd_pii) assert myFullDoc.uri == self.full_pii_uri def test_init_doi(self): """ Test case: uri is set correctly during initialization with DOI""" myFullDoc = FullDoc(doi = self.doi) assert myFullDoc.uri == self.full_doi_uri ## Test reading/writing author profile data bad_client = ElsClient("dummy") good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) good_client.local_dir = str(test_path) myFullDoc = FullDoc(uri = full_pii_uri) def test_read_good_bad_client(self): """Test case: using a well-configured client leads to successful read and using a badly-configured client does not.""" assert self.myFullDoc.read(self.bad_client) == False assert self.myFullDoc.read(self.good_client) == True def test_json_to_dict(self): """Test case: the JSON read by the full article object from the API is parsed into a Python dictionary""" assert type(self.myFullDoc.data) == dict def test_title_getter(self): """Test case: the title attribute is returned as a non-empty string""" assert (type(self.myFullDoc.title) == str and self.myFullDoc.title != '') def test_write(self): """Test case: the full article object's data is written to a file with the ID in the filename""" self.myFullDoc.write() ## TODO: replace following (strung-together replace) with regex assert util.file_exist_with_id( self.myFullDoc.data['coredata']['pii'].replace('-','').replace('(','').replace(')',''))
class TestAbsDoc: """Test Scopus document functionality""" ## Test data abs_uri = "https://api.elsevier.com/content/abstract/scopus_id/84872135457" scp_id_int = 84872135457 scp_id_str = "84872135457" ## Test initialization def test_init_uri(self): """ Test case: uri is set correctly during initialization with uri""" myAbsDoc = AbsDoc(uri = self.abs_uri) assert myAbsDoc.uri == self.abs_uri def test_init_scp_id_int(self): """ Test case: uri is set correctly during initialization with Scopus id as integer""" myAbsDoc = AbsDoc(scp_id = self.scp_id_int) assert myAbsDoc.uri == self.abs_uri def test_init_scp_id_str(self): """ Test case: uri is set correctly during initialization with Scopus id as string""" myAbsDoc = AbsDoc(scp_id = self.scp_id_str) assert myAbsDoc.uri == self.abs_uri ## Test reading/writing author profile data bad_client = ElsClient("dummy") good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) good_client.local_dir = str(test_path) myAbsDoc = AbsDoc(uri = abs_uri) def test_read_good_bad_client(self): """Test case: using a well-configured client leads to successful read and using a badly-configured client does not.""" assert self.myAbsDoc.read(self.bad_client) == False assert self.myAbsDoc.read(self.good_client) == True def test_json_to_dict(self): """Test case: the JSON read by the abstract document object from the API is parsed into a Python dictionary""" assert type(self.myAbsDoc.data) == dict def test_title_getter(self): """Test case: the title attribute is returned as a non-empty string""" assert (type(self.myAbsDoc.title) == str and self.myAbsDoc.title != '') def test_write(self): """Test case: the abstract document object's data is written to a file with the Scopus ID in the filename""" self.myAbsDoc.write() assert util.file_exist_with_id(self.myAbsDoc.data['coredata']['dc:identifier'].split(':')[1])
def get_pubs_org_from_api(org_id: str, api_key=None) -> Optional[json.dumps]: """ Loads and returns data on publications of organization from Scopus via API. """ client = ElsClient(api_key) search = ElsSearch(f"(AF-ID({org_id}))", 'scopus') # AND PUBYEAR > 2019 # todo переписать в асинхронном режиме search.execute(client, get_all=True) # загружаем данные по публикациям организации if client.req_status['status_code'] != 200: return None pubs = search.results logging.info(f'{len(pubs)} publications received') # составляем список тасков для загрузки данных по авторам tasks = defaultdict(list) for i, res in enumerate(pubs): for authors_link in res['link']: if authors_link['@ref'] == 'author-affiliation': tasks[i] = authors_link['@href'] break header = get_header(api_key) result = async_fetch_urls(tasks.values(), header) for i, j in zip(tasks.keys(), result): pubs[i]['authors'] = j return pubs
def search(self): # initialize the keys keygen = self.key_generator() init_key = next(keygen) # Initialize the elsapy client client = ElsClient(init_key, view=self.view) count = 0 folder = Path('result') / f'{self.subject}_{time.strftime("%Y%m%d")}' if not folder.exists(): folder.mkdir(parents=True) for query in self.queries: try: name = next(self.names) name = '_'.join(name) except: # this could happen if your file name contains unexpected characters error_log.info(f'Name error at {query}.') break try: srch =ElsSearch(query, index=self.subject, keygen=keygen) srch.execute(client, get_all=True) count += 1 print(f'Progress: {count}/{self.length}, {query}') if srch.status_code == 400: error_log.info(f'Bad query: {name}') else: search_log.info(f'Results found: {name}, # of results: {len(srch.results)}') self.write_json(srch.results, name, folder) except Exception as e: error_log.info(f'Search error: {name}, {str(e)}')
def test_init_apikey_insttoken_path(self): """Test case: APIkey, insttoken and local path are set correctly during initialization""" loc_dir = '\\TEMP' my_client = ElsClient(config['apikey'], inst_token = config['insttoken'], local_dir = loc_dir) assert my_client.api_key == config['apikey'] assert my_client.inst_token == config['insttoken'] assert str(my_client.local_dir) == loc_dir
def test_set_apikey_insttoken(self): """Test case: APIkey and insttoken are set correctly using setters""" my_client = ElsClient("dummy") my_client.api_key = config['apikey'] my_client.inst_token = config['insttoken'] assert my_client.api_key == config['apikey'] assert my_client.inst_token == config['insttoken']
def search_my_query(my_query): ''' Function to search a query in scopus :param my_query: string of query desired to be searched in scopus :return: resultant dataframe with query from scopus ''' if type(my_query) == str: ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client client = ElsClient(config['APIKey']) ## Initialize doc search object using Scopus and execute search, retrieving all results print('......Searching Scopus......') print('......for..... ' + query + ' ....') doc_srch = ElsSearch(query, 'scopus') doc_srch.execute(client, get_all=True) print("doc_srch has", len(doc_srch.results), "results.") return doc_srch.results_df else: print('the query must be a string. no searches run...') return
def initClient(self): ## Initialize client try: self.client = ElsClient(self.config['apikey']) self.client.inst_token = self.config['insttoken'] except: pass
def auth(self): con_file = open(self.config_path) config = json.load(con_file) con_file.close() self.client = ElsClient(config['apikey']) self.client.inst_token = config['insttoken']
def __init__(self, con_path): from elsapy.elsclient import ElsClient from elsapy.elssearch import ElsSearch with open(con_path) as con_file: self.config = json.load(con_file) self.client = ElsClient(self.config['apikey'])
def __init__(self): ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client self.client = ElsClient(config['apikey']) self.client.inst_token = config['insttoken']
def __init__(self, path): self.path = path con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client self.client = ElsClient(config['apikey']) self.client.inst_token = config['insttoken']
def initialiseScopus(): ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client client = ElsClient(config['apikey']) return client
def __init__(self): ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client self.client = ElsClient(config['apikey']) self.client.inst_token = config['insttoken'] self.psa = ParseSpecialAuthors() self.countries_by_num_authors = [set() for _ in range(max_num_authors)]
def main(): # Load author ID list with open('authors.json', 'r', encoding='utf-8') as fp: data = json.load(fp) author_list = data['ids'] ## Load configuration with open("config.json") as con_file: config = json.load(con_file) ## Initialize client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] get_metrics(client, author_list)
def search(self, query_name): """Do a search. Args: query_name (str): the name of the file in the ./queries/ directory that contains the query. Defaults to "query.txt". Raises: FileNotFoundError if the file query file can not be found. Returns: list: The results. """ query = self._load_query(query_name) client = ElsClient(self.api_key) search = ElsSearch(query, "scopus") search.execute(client) return search.results
def getInfoAboutTeacher(person): # Load configuration con_file = open(SCOPUS_CREDENTIAL_FILE) config = json.load(con_file) con_file.close() # Initialize client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] # Initialize author with uri my_auth = ElsAuthor( uri='https://api.elsevier.com/content/author/author_id/' + str(person.scopusId)) # Read author data, then write to disk if my_auth.read(client): return my_auth.data['coredata'] else: print("Read author failed.")
class TestSearch: """Test search functionality""" ## Test data base_url = u'https://api.elsevier.com/content/search/' search_types = [ {"query" : "authlast(keuskamp)", "index" : "author"}, {"query" : "affil(amsterdam)", "index" : "affiliation"}, {"query" : "AFFIL(dartmouth) AND AUTHOR-NAME(lewis) AND PUBYEAR > 2011", "index" : "scopus"}, {"query" : "star trek vs star wars", "index" : "sciencedirect"} ] searches = [ ElsSearch(search_type["query"], search_type["index"]) for search_type in search_types] good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) ## Test initialization def test_init_uri(self): """Test case: query, index and uri are set correctly during initialization""" match_all = True for i in range(len(self.search_types)): if (self.searches[i].query != self.search_types[i]['query'] or self.searches[i].index != self.search_types[i]['index'] or self.searches[i].uri != (self.base_url + self.search_types[i]['index'] + '?query=' + url_encode(self.search_types[i]['query']))): match_all = False assert match_all == True def test_execution(self): '''Test case: all searches are executed without raising an exception.''' for search in self.searches: search.execute(self.good_client) assert True
def find_articles(year=None, issn=None, get_all=True, id_type="doi", apikey=None): """ Returns a list of the DOI's for all articles published in the specified year and journal. Args: year (str): year of publication issn (str): ISSN (or EISSN) of journal get_all (bool): Whether all results should be returned or just the 1st result. Default is True. id_type: (str) Return document eids or dois. Default is doi. Returns: ids (str): The eids/dois for all articles published in corresponding journal in the specified year """ query = build_scopus_query(year=year, issn=issn) if apikey: CLIENT = ElsClient(apikey, num_res=10000) search = ElsSearch( query, index='scopus', ) search.execute(els_client=CLIENT, get_all=get_all) if id_type == "doi": key = 'prism:doi' else: key = id_type ids = [] for r in search.results: try: ids.append(r[key]) except: continue return ids
def get_doc(self, dtype, identity): """ This method retrieves a 'Doc' object from the Elsevier API. The doc object contains metadata and full-text information about a publication associated with a given PII. Parameters: ----------- dtype(str,required): The type of identification string being used to access the document. (Almost always PII in our case.) identity: The actual identification string/ PII that will be used to query. """ if dtype == 'pii': doc = FullDoc(sd_pii = identity) elif dtype == 'doi': doc= FullDoc(doi = identity) if doc.read(ElsClient(self.API_list[0])): #print ("doc.title: ", doc.title) doc.write() else: print ("Read document failed.") return doc
class TestElsAuthor: """Test author object functionality""" ## Test data auth_uri = "https://api.elsevier.com/content/author/author_id/55070335500" auth_id_int = 55070335500 auth_id_str = "55070335500" ## Test initialization def test_init_uri(self): """ Test case: uri is set correctly during initialization with uri""" myAuth = ElsAuthor(uri = self.auth_uri) assert myAuth.uri == self.auth_uri def test_init_auth_id_int(self): """ Test case: uri is set correctly during initialization with author id as integer""" myAuth = ElsAuthor(author_id = self.auth_id_int) assert myAuth.uri == self.auth_uri def test_init_auth_id_str(self): """ Test case: uri is set correctly during initialization with author id as string""" myAuth = ElsAuthor(author_id = self.auth_id_str) assert myAuth.uri == self.auth_uri ## Test reading/writing author profile data bad_client = ElsClient("dummy") good_client = ElsClient(config['apikey'], inst_token = config['insttoken']) good_client.local_dir = str(test_path) myAuth = ElsAuthor(uri = auth_uri) def test_read_good_bad_client(self): """Test case: using a well-configured client leads to successful read and using a badly-configured client does not.""" assert self.myAuth.read(self.bad_client) == False assert self.myAuth.read(self.good_client) == True def test_json_to_dict(self): """Test case: the JSON read by the author object from the API is parsed into a Python dictionary""" assert type(self.myAuth.data) == dict def test_name_getter(self): """Test case: the full name attribute is returned as a non-empty string""" assert (type(self.myAuth.full_name) == str and self.myAuth.full_name != '') def test_write(self): """Test case: the author object's data is written to a file with the author ID in the filename""" self.myAuth.write() assert util.file_exist_with_id(self.myAuth.data['coredata']['dc:identifier'].split(':')[1]) def test_read_docs(self): self.myAuth.read_docs() assert len(self.myAuth.doc_list) > 0 ## TODO: once author metrics inconsistency is resolved, change to: # assert len(self.myAuth.doc_list) == int(self.myAuth.data['coredata']['document-count']) def test_read_metrics_new_author(self): myAuth = ElsAuthor(uri = self.auth_uri) myAuth.read_metrics(self.good_client) assert ( myAuth.data['coredata']['citation-count'] and myAuth.data['coredata']['cited-by-count'] and myAuth.data['coredata']['document-count'] and myAuth.data['h-index']) def test_read_metrics_existing_author(self): self.myAuth.read_metrics(self.good_client) assert ( self.myAuth.data['coredata']['citation-count'] and self.myAuth.data['coredata']['cited-by-count'] and self.myAuth.data['coredata']['document-count'] and self.myAuth.data['h-index'])
import pandas as pd from elsapy.elsclient import ElsClient from elsapy.elssearch import ElsSearch import json fefu_id = '60103811' con_file = open("config.json") config = json.load(con_file) con_file.close() # view = 'COMPLETE' -- to access more fields client = ElsClient(config['apikey'], num_res = 25) search = ElsSearch('AF-ID( ' + fefu_id + ' )', 'scopus') search.execute(client) sr = search.results result = [] res = {} authorname = '' authid = '' special_fields = ['authname', 'authid', 'prism:coverDisplayDate', 'prism:pageRange', 'openaccessFlag', 'link', 'prism:coverDate'] fields = { 'authname' : 'Authors', 'authid' : 'Author(s) ID', 'dc:title' : 'Title', 'prism:coverDate' : 'Year', 'prism:publicationName' : 'Source title', 'prism:volume' : 'Volume', 'prism:doi' : 'DOI',
def auth_query(auth_last, auth_first): auth_data = [auth_last, auth_first] print("Searching for author %s, %s" % (auth_last, auth_first)) # Initialize search object and execute search under the author index query = 'authlast(%s)+AND+authfirst(%s)' % (auth_last, auth_first) try: auth_srch = ElsSearch(query, 'author') auth_srch.execute(client, get_all=False) except: # Load other configuration with new API Key con_file = open("config2.json") config = json.load(con_file) con_file.close() # Initialize new client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] auth_srch = ElsSearch(query, 'author') auth_srch.execute(client, get_all=False) if (len(auth_srch.results) == 1): print("auth_srch has", len(auth_srch.results), "result.") else: print("auth_srch has", len(auth_srch.results), "results.") # checking if no results at all error_message = auth_srch.results[0].get('error') if (len(auth_srch.results) > 0): if (not error_message): # grabs the author_id from the search data # this assumes that the wanted author is the first one in results # check this out later try: string_author_id = auth_srch.results[0].get('dc:identifier') # this line cuts the author id string from the end of AUTHOR_ID # to the end of the id digits author_id = string_author_id[10:] print("author_id : %s" % author_id) auth_data.append(author_id) except AttributeError: print("Could not extract auth_id field for %s, %s" % (auth_last, auth_first)) auth_data.append("CNE") # grabs the curr_affil from the search data # appends it to auth_data try: dict_curr_affil = auth_srch.results[0].get('affiliation-current') curr_affil = dict_curr_affil.get('affiliation-name') print("curr_affil : %s" % curr_affil) auth_data.append(curr_affil) except AttributeError: print("Could not extract curr_affil field for %s, %s" % (auth_last, auth_first)) auth_data.append("CNE") # this could be a false positive! the author name could be in the name-variant field # I redo the query down below in the next function else: auth_data.append("DNE") auth_data.append("DNE") print(error_message) else: print("very bad error @ length of auth_srch.results <= 0") auth_data.append("none") auth_data.append("none") return auth_data
def from_database(self, time_constraint): con_file = open("config.json") config = json.load(con_file) con_file.close() client = ElsClient(config['apikey']) ###TODO: add year back in?? searchwords = {'category': [], 'specific': []} if self.analysis_type == 'supervised': tech_words = ["machine learning"] elif self.analysis_type == 'unsupervised': tech_words = ["clustering"] for name, obj in inspect.getmembers(MLTechniques): if inspect.isclass(obj): if obj.TECHNIQUE_TYPE == self.analysis_type: if not obj.ISDEEP or time_constraint > 1: searchwords['specific'].append(obj.get_name()) searchwords['category'].append(obj.get_category()) print(searchwords['category']) textmine_results = {'words': [], 'scores': [], 'allwords': []} print("-----UNKNOWN DATA DETECTED: INITIATING TEXT MINING-----") print() allurls = [] combos = self.generate_combinations(self.queries, tech_words) if time_constraint == 1: query_size = set_query_number(combos, 100) if time_constraint == 2: query_size = set_query_number(combos, 250) if time_constraint == 3: query_size = set_query_number(combos, 500) if time_constraint == 4: query_size = set_query_number(combos, 750) if time_constraint == 5: query_size = set_query_number(combos, 1000) i = 0 for n, combo in enumerate(combos): print("SEARCH QUERY " + str(n + 1) + ":") print(combo) print() string = "" for word in combo: string += (word + " ") doc_srch = ElsSearch(string, 'sciencedirect') results = TEXTMINE.execute_modified(doc_srch.uri, client, get_all=True, set_limit=query_size) if results != 0: print("SUCCESSFUL QUERY") for num, res in enumerate(results): DOI = res['prism:doi'] URL = 'https://api.elsevier.com/content/article/DOI/' + str( DOI) + "?APIkey=" + str(config['apikey']) if URL not in allurls: r = requests.get(URL) allurls.append(URL) with open(str(self.user_id), 'w') as f: f.write(r.text) f.close() foundwords, allwords = TEXTPROCESS.findkeywords( str(self.user_id), searchwords, self.user_keywords) if len(list(foundwords.keys())): i += 1 print(i) textmine_results['words'].extend( list(foundwords.keys())) textmine_results['scores'].extend( list(foundwords.values())) textmine_results['allwords'].extend(allwords) os.remove(str(self.user_id)) print("------MINING COMPLETE: SEARCHING FOR KEYWORDS-----") keywords, keyword_scores = self.adjust_output(textmine_results) return keywords, keyword_scores, searchwords
def test_init_apikey_insttoken(self): """Test case: APIkey and insttoken are set correctly during initialization""" my_client = ElsClient(config['apikey'], inst_token = config['insttoken']) assert my_client.api_key == config['apikey'] assert my_client.inst_token == config['insttoken']
def detailed_auth_query(auth_last, auth_first): auth_data = [auth_last, auth_first, '', ''] print("Searching for author %s, %s" % (auth_last, auth_first)) # Initialize search object and execute search under the author index query = 'authlast(%s)+AND+authfirst(%s)' % (auth_last, auth_first) try: auth_srch = ElsSearch(query, 'author') auth_srch.execute(client, get_all=False) except: # Load other configuration with new API Key con_file = open("config2.json") config = json.load(con_file) con_file.close() # Initialize new client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] auth_srch = ElsSearch(query, 'author') auth_srch.execute(client, get_all=False) if (len(auth_srch.results) == 1): print("auth_srch has", len(auth_srch.results), "result.") else: print("auth_srch has", len(auth_srch.results), "results.") # checking if no results at all error_message = auth_srch.results[0].get('error') if (len(auth_srch.results) > 0): if (not error_message): print("Into the results...") # grabs the author_id from the search data for i in range(len(auth_srch.results)): try: string_author_id = auth_srch.results[i].get('dc:identifier') # this line cuts the author id string from the end of AUTHOR_ID # to the end of the id digits author_id = string_author_id[10:] print("author_id : %s" % author_id) auth_data[2] = author_id except AttributeError: print("Could not extract auth_id field for %s, %s" % (auth_last, auth_first)) auth_data[2] = "CNE" # grabs the curr_affil from the search data # appends it to auth_data try: dict_curr_affil = auth_srch.results[i].get('affiliation-current') curr_affil = dict_curr_affil.get('affiliation-name') print("curr_affil : %s" % curr_affil) except AttributeError: print("Could not extract curr_affil field for %s, %s" % (auth_last, auth_first)) auth_data[3] = "CNE" try: # if UR not current affil go on and search history if (not isUR(curr_affil)): affil_hist = auth_id_query(auth_data[2]) try: if (len(affil_hist) > 1): for institution in affil_hist: try: affil_instance = institution['ip-doc']['preferred-name']['$'] # if UR affil is found, return immediately if (isUR(affil_instance)): curr_affil = affil_instance auth_data[3] = curr_affil return auth_data except: print("Affiliation instance data for %s,%s wasn't structured correctly." % (auth_data[0], auth_data[1])) # print(institution) else: try: affil_instance = affil_hist['ip-doc']['preferred-name']['$'] try: # if UR affil is found, return immediately if (isUR(affil_instance)): curr_affil = affil_instance auth_data[3] = curr_affil return auth_data except TypeError: print("isUR error") print(affil_instance) except: print("Affiliation instance data for %s,%s wasn't structured correctly." % (auth_data[0], auth_data[1])) # print(institution) except TypeError: print("Type Error occured for affil_hist of %s,%s" % (auth_data[0], auth_data[1])) print(affil_hist) # but if it is then return immediately else: print("Returned with curr_affil : '%s' for %s,%s" % (curr_affil, auth_data[0], auth_data[1])) auth_data[3] = curr_affil return auth_data except: print("Something wrong within the returned profile data of %s,%s" % (auth_data[0], auth_data[1])) # this is the case of hitting the cap of 25, too many people down the list if (len(auth_srch.results) >= 25): print("Results CAP of 25 was hit for the %d results of %s,%s" % (len(auth_srch.results), auth_data[0], auth_data[1])) auth_data[3] = 'max' return auth_data # this covers the case of no UR affils found at all elif (len(auth_srch.results) < 25): print("EXHAUSTED results list of %d results for %s,%s" % (len(auth_srch.results), auth_data[0], auth_data[1])) auth_data[3] = 'na' return auth_data # this could be a false positive! the author name could be in the name-variant field # I redo the query down below in the next function else: auth_data[2] = 'DNE' auth_data[3] = 'DNE' print(error_message) else: print("very bad error @ length of auth_srch.results <= 0") auth_data[2] = 'NONE' auth_data[3] = 'NONE' return auth_data
# ------------------------------------------------------------------------------- #import elsapy from elsapy.elsclient import ElsClient from elsapy.elsprofile import ElsAuthor from elsapy.elssearch import ElsSearch import pandas as pd names_csv = 'sustainability-persons_no_sir_sub.csv' df = pd.read_csv(names_csv, error_bad_lines=False) API_KEY = 'd54807cb12735c3d461f169c0ae75a2e' ## Initialize client client = ElsClient(API_KEY) query = 'AUTHFIRST(%s) AND AUTHLASTNAME(%s)' # AND AF-ID(60003892)' #name_list = df["Name"][86].split() #first,last = name_list[0],name_list[len(name_list)-1] name = df["Name"].iloc[26] profile_urls = [] for name in df["Name"]: name_list = name.split() first, last = name_list[0], name_list[len(name_list) - 1] auth_srch = ElsSearch(query % (first, last), 'author') auth_srch.execute(client) #print ("auth_srch has", len(auth_srch.results), "results.") try:
with open('config.json') as config_file: config = json.load(config_file) GET_ALL = config[ 'get_all'] # False gets one chunk (25) True gets all or max (5000) FULL_TEXT = config['full_text'] # Save fulltext OPEN_ACCESS = config[ 'open_access'] # Search only openaccess documents (so we can get the full text) # "public policy AND (impact OR result OR evaluation OR evidence) AND (climate OR environment)" query = config['query'] if OPEN_ACCESS: query = "openaccess(1) AND " + query client = ElsClient(config['api_key']) doc_srch = ElsSearch(query, 'sciencedirect') doc_srch.execute(client, get_all=GET_ALL) for doc in doc_srch.results: doi = doc['dc:identifier'] print(doi) if FULL_TEXT: ## ScienceDirect (full-text) document example using DOI doi_doc = FullDoc(doi=doi) if doi_doc.read(client): doi_doc.write() else: print("Read full-text failed for DOI", doi)
def main(): # Load author names list with open('authors.json', 'r', encoding='utf-8') as fp: data = json.load(fp) search_list = data['names'] # Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() # Initialize client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] # Run search for each author names in list and get IDs auth_id_list = [] for author in search_list: search_query = "" if len(author[0]) > 0: search_query += f"authfirst({author[0]}) " if len(author[1]) > 0: search_query += f"authlast({author[1]})" auth_srch = ElsSearch(search_query, 'author') auth_srch.execute(client) print( f'\n{author[0]} {author[1]}: {len(auth_srch.results)} results found!\n' ) # If there are more than one author that matches the search, display search results if len(auth_srch.results) > 1: for i, search_result in enumerate(auth_srch.results): first_name = search_result['preferred-name']['given-name'] surname = search_result['preferred-name']['surname'] try: affiliation = search_result['affiliation-current'][ 'affiliation-name'] affiliation_country = search_result['affiliation-current'][ 'affiliation-country'] except KeyError: affiliation = '' affiliation_country = '' print( f"[{i+1}] {first_name} {surname}, {affiliation} ({affiliation_country})" ) # Choose desired author desired_author_index = int(input('\nChoose correct author: ')) - 1 else: desired_author_index = 0 # Get author ID desired_author = auth_srch.results[desired_author_index] link = desired_author['link'][0]['@href'] auth_id = desired_author['dc:identifier'].split(':')[1] auth_id_list.append(auth_id) # Save author ID to JSON with open('authors.json', 'w', encoding='utf-8') as fp: data = {'ids': auth_id_list, 'names': search_list} json.dump(data, fp, indent=4, sort_keys=True) print(link) print('\n-----------\n') print('Grabbing author metrics...') get_author_by_id.get_metrics(client, auth_id_list)