def search_my_query(my_query): ''' Function to search a query in scopus :param my_query: string of query desired to be searched in scopus :return: resultant dataframe with query from scopus ''' if type(my_query) == str: ## Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() ## Initialize client client = ElsClient(config['APIKey']) ## Initialize doc search object using Scopus and execute search, retrieving all results print('......Searching Scopus......') print('......for..... ' + query + ' ....') doc_srch = ElsSearch(query, 'scopus') doc_srch.execute(client, get_all=True) print("doc_srch has", len(doc_srch.results), "results.") return doc_srch.results_df else: print('the query must be a string. no searches run...') return
def get_pubs_org_from_api(org_id: str, api_key=None) -> Optional[json.dumps]: """ Loads and returns data on publications of organization from Scopus via API. """ client = ElsClient(api_key) search = ElsSearch(f"(AF-ID({org_id}))", 'scopus') # AND PUBYEAR > 2019 # todo переписать в асинхронном режиме search.execute(client, get_all=True) # загружаем данные по публикациям организации if client.req_status['status_code'] != 200: return None pubs = search.results logging.info(f'{len(pubs)} publications received') # составляем список тасков для загрузки данных по авторам tasks = defaultdict(list) for i, res in enumerate(pubs): for authors_link in res['link']: if authors_link['@ref'] == 'author-affiliation': tasks[i] = authors_link['@href'] break header = get_header(api_key) result = async_fetch_urls(tasks.values(), header) for i, j in zip(tasks.keys(), result): pubs[i]['authors'] = j return pubs
def find_articles(year=None, issn=None, get_all=True): """ Returns a list of the DOI's for all articles published in the specified year and journal. Args: year (str): year of publication issn (str): ISSN (or EISSN) of journal get_all (bool): Whether all results should be returned or just the 1st result. Default is True. Returns: dois (str): The dois for all articles published in corresponding journal in the specified year """ query = build_scopus_query(year=year, issn=issn) search = ElsSearch( query, index='scopus', ) search.execute(els_client=CLIENT, get_all=get_all) dois = [] for r in search.results: try: dois.append(r['prism:doi']) except: continue return dois
def search(self): # initialize the keys keygen = self.key_generator() init_key = next(keygen) # Initialize the elsapy client client = ElsClient(init_key, view=self.view) count = 0 folder = Path('result') / f'{self.subject}_{time.strftime("%Y%m%d")}' if not folder.exists(): folder.mkdir(parents=True) for query in self.queries: try: name = next(self.names) name = '_'.join(name) except: # this could happen if your file name contains unexpected characters error_log.info(f'Name error at {query}.') break try: srch =ElsSearch(query, index=self.subject, keygen=keygen) srch.execute(client, get_all=True) count += 1 print(f'Progress: {count}/{self.length}, {query}') if srch.status_code == 400: error_log.info(f'Bad query: {name}') else: search_log.info(f'Results found: {name}, # of results: {len(srch.results)}') self.write_json(srch.results, name, folder) except Exception as e: error_log.info(f'Search error: {name}, {str(e)}')
def search(self): search_string_without_year = f"({self._title})" + "".join(list(f" OR ({x})" for x in self._similar)) for year in range(self._begin_year, self._end_year + 1): search_string_year = f" AND PUBYEAR = {year}" doc_srch = ElsSearch(search_string_without_year + search_string_year,'sciencedirect') doc_srch.execute(client, get_all = False) with open(self._filename, "a") as f: f.write(f"{self._new_title};{year};{doc_srch.tot_num_res}\n")
def get_relevant_papers(patient_id): results = {} count = 0 conditions = get_search_terms(patient_id) for cond in conditions: doc_srch = ElsSearch(cond, 'scopus') doc_srch.execute(client, get_all=False) for res in doc_srch.results: results[str(count)] = res count += 1 print("doc_srch for ", cond, " has", len(doc_srch.results), "results.") return get_n_most_cited(5, results) #CHANGE TO NUMBER OF WANTED PAPERS
def doi(self, doi): from elsapy.elsclient import ElsClient from elsapy.elssearch import ElsSearch aff_srch = ElsSearch("DOI ( %s )" % doi, 'scopus', maxResults=1) aff_srch.execute(self.client, get_all=True) rs = [x for x in aff_srch.results if "error" not in x] if rs: doi = rs[0]['prism:doi'] return {"doi": doi, "title": rs[0]["dc:title"], "record": rs[0]} else: return None
def get_docs_by_author(author_id): """ Get documents published by an author :param author_id: :param api_key: :return: """ logger.info(f'Searching docs by author {author_id}') api_key = key_manager.get_key('scopus_search') client = elsclient.ElsClient(api_key) search = ElsSearch(f'au-id({author_id})', 'scopus') search.execute(client, True) for doc in search.results: yield doc
def get_author_by_name(last_name, first_name, affiliation_id='60005248'): """ Search author by first name, last name and affiliation :param last_name: :param first_name: :param affiliation_id: :param api_key: :return: """ logger.info('searching authors by name and affiliation') api_key = key_manager.get_key('scopus_search') query = f'authlast({last_name}) and authfirst({first_name}) and af-id({affiliation_id})' client = elsclient.ElsClient(api_key) search = ElsSearch(query, 'author') search.execute(client) yield {'first_name': first_name, 'last_name': last_name, 'results': search.results}
def search(self,query="A Lightweight Autoencoder"): doc_srch = ElsSearch(query,'sciencedirect') doc_srch.execute(self.client, get_all = False) for _,doc in doc_srch.results_df.iterrows(): pii_doc = FullDoc(sd_pii = doc['pii']) if pii_doc.read(self.client): try: abstract = " ".join(pii_doc.data['coredata']['dc:description'].split()[1:]) doc_id = str(hex(time.time().as_integer_ratio()[0])) title = doc['dc:title'] pdf_link = doc['link']['scidir'] dates = doc['load-date'].split('-')[0] self.data[doc_id] = {"title": title, "year": dates, "link": pdf_link, "Abstract":abstract} except: pass else: print("Doc Skipped!!")
def search(self, query_name): """Do a search. Args: query_name (str): the name of the file in the ./queries/ directory that contains the query. Defaults to "query.txt". Raises: FileNotFoundError if the file query file can not be found. Returns: list: The results. """ query = self._load_query(query_name) client = ElsClient(self.api_key) search = ElsSearch(query, "scopus") search.execute(client) return search.results
def author_score(fname, lname): client = elsevier_auth() the_zip = zip(fname, lname) num = len(fname) count = 0 total = 0 score = 0 for first, last in the_zip: start = time.time() print(first, last) myDocSrch = ElsSearch( 'AUTHLASTNAME(' + last + ') AND AUTHFIRST(' + first + ')', 'author') myDocSrch.execute(client) for x in myDocSrch.results: try: a_id = x['dc:identifier'] except: continue auth_id = a_id.replace('AUTHOR_ID:', '') author = ElsAuthor(author_id=auth_id) if (author.read_metrics(client)): h_index = author.data['h-index'] score += h_index print(first, last, " ID:", auth_id, " h-index:", h_index) else: print("no data") score += 0 end = time.time() diff = end - start total += diff count += 1 num -= 1 avg = total / count est = (num * avg) / 60 print("time used for this author:", end - start, "s") print(num, "authors, estimated time left:", est, "minutes") print() return score
def retrivePublicationForAuthor(author, client, output_file): doc_srch = ElsSearch("AUTHOR-NAME({}) AND PUBYEAR > 2018".format(author), 'scopus') doc_srch.execute(client, get_all=True) print(author) print("doc_srch has", len(doc_srch.results), "results.") for res in doc_srch.results: if any(i in res['dc:title'] for i in key_words): try: doi = res['prism:doi'] except: try: doi = res['prism:url'] except: doi = '' output_file.write('{}\t{}\t{}\t{}\t{}\n'.format( res['dc:title'], doi, res['prism:coverDate'], res['subtypeDescription'], res['prism:publicationName']))
def apiCall(query): results = [] searchResult = ElsSearch(query, 'sciencedirect') # Check if API reuturns valid reults try: searchResult.execute(client, get_all=False) except: return results for result in searchResult.results: # Attempt to retrieve abstract try: DOI = result["prism:doi"] abstractResp = requests.get(url=abstractURL + DOI, headers=API_headers) abstractData = json.loads(abstractResp.text) # If valid abstract found then exract required data abstract = abstractData["abstracts-retrieval-response"][ "coredata"]["dc:description"] author = result["dc:creator"] title = result["dc:title"] source = result["prism:publicationName"] sourceURL = result["prism:url"] results.append({ "abstract": abstract, "author": author, "title": title, "source": source, "sourceURL": sourceURL }) except: pass if (len(results) == 5): break return results
def find_articles(year=None, issn=None, get_all=True, id_type="doi", apikey=None): """ Returns a list of the DOI's for all articles published in the specified year and journal. Args: year (str): year of publication issn (str): ISSN (or EISSN) of journal get_all (bool): Whether all results should be returned or just the 1st result. Default is True. id_type: (str) Return document eids or dois. Default is doi. Returns: ids (str): The eids/dois for all articles published in corresponding journal in the specified year """ query = build_scopus_query(year=year, issn=issn) if apikey: CLIENT = ElsClient(apikey, num_res=10000) search = ElsSearch( query, index='scopus', ) search.execute(els_client=CLIENT, get_all=get_all) if id_type == "doi": key = 'prism:doi' else: key = id_type ids = [] for r in search.results: try: ids.append(r[key]) except: continue return ids
def query(self, country, keywords=words, after_year=None): """ Example: aff_srch = ElsSearch('( ( AFFILCOUNTRY ( argentina ) AND (TITLE-ABS-KEY ( Puccinia ) OR TITLE-ABS-KEY(Lactobacillus)) ) ) AND ( burguener ) ','scopus') aff_srch.execute(client) print ("aff_srch has", len(aff_srch.results), "results.") AFFILCOUNTRY ( argentina ) AND ( LIMIT-TO ( SRCTYPE , "j" ) OR LIMIT-TO ( SRCTYPE , "p" ) ) :return: """ from elsapy.elssearch import ElsSearch kwfilter = "OR ".join(['TITLE-ABS-KEY ("' + w + '") ' for w in keywords]) after_year_q = "AND PUBYEAR AFT " + str(after_year) if after_year else "" aff_srch = ElsSearch("((" + kwfilter + ") AND AFFILCOUNTRY ( " + country + " ) " + after_year_q + " )", 'scopus', maxResults=10000) aff_srch.execute(self.client, get_all=True) for article in aff_srch.results: yield article
def get_docs_by_year(year, affl_id='60005248', get_all=False): """ Get documents by year :param year: :param affl_id: :param get_all: :param api_key: :return: """ logger.info(f'Searching docs for year {year} and affiliation {affl_id}') api_key = key_manager.get_key('scopus_search') client = elsclient.ElsClient(api_key) # Split the search since for recent years JHU has publications more than 5,000 each year. search_one = ElsSearch(f'af-id({affl_id})', 'scopus', {'date': year}) search_one.execute(client, get_all) all_results = search_one.results # # search_two = ElsSearch('af-id(60005248) AND NOT subjarea(MEDI)', 'scopus', {'date': year}) # search_two.execute(client, get_all) # all_results += search_two.results return all_results
def scidir_search(search_terms, database): """ Initialize doc search object using ScienceDirect and/or Scopus, and execute search, retrieving all results parameter --------- search_terms (str): The string to search database ([]): oprional databasename to search result ------ doc_srch: dataframe """ print("Running scidir_search...") print("Searching: {}".format(database)) doc_srch = ElsSearch(search_terms, database) doc_srch.execute(client, get_all=True) print( "Retrieved {} from {}. Writing to file for further processing".format( len(doc_srch.results), database)) doc_srch.results_df.to_csv('data/' + str(database) + '.csv', index=None)
def auth_query(auth_last, auth_first): auth_data = [auth_last, auth_first] print("Searching for author %s, %s" % (auth_last, auth_first)) # Initialize search object and execute search under the author index query = 'authlast(%s)+AND+authfirst(%s)' % (auth_last, auth_first) try: auth_srch = ElsSearch(query, 'author') auth_srch.execute(client, get_all=False) except: # Load other configuration with new API Key con_file = open("config2.json") config = json.load(con_file) con_file.close() # Initialize new client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] auth_srch = ElsSearch(query, 'author') auth_srch.execute(client, get_all=False) if (len(auth_srch.results) == 1): print("auth_srch has", len(auth_srch.results), "result.") else: print("auth_srch has", len(auth_srch.results), "results.") # checking if no results at all error_message = auth_srch.results[0].get('error') if (len(auth_srch.results) > 0): if (not error_message): # grabs the author_id from the search data # this assumes that the wanted author is the first one in results # check this out later try: string_author_id = auth_srch.results[0].get('dc:identifier') # this line cuts the author id string from the end of AUTHOR_ID # to the end of the id digits author_id = string_author_id[10:] print("author_id : %s" % author_id) auth_data.append(author_id) except AttributeError: print("Could not extract auth_id field for %s, %s" % (auth_last, auth_first)) auth_data.append("CNE") # grabs the curr_affil from the search data # appends it to auth_data try: dict_curr_affil = auth_srch.results[0].get('affiliation-current') curr_affil = dict_curr_affil.get('affiliation-name') print("curr_affil : %s" % curr_affil) auth_data.append(curr_affil) except AttributeError: print("Could not extract curr_affil field for %s, %s" % (auth_last, auth_first)) auth_data.append("CNE") # this could be a false positive! the author name could be in the name-variant field # I redo the query down below in the next function else: auth_data.append("DNE") auth_data.append("DNE") print(error_message) else: print("very bad error @ length of auth_srch.results <= 0") auth_data.append("none") auth_data.append("none") return auth_data
def main(): # Load author names list with open('authors.json', 'r', encoding='utf-8') as fp: data = json.load(fp) search_list = data['names'] # Load configuration con_file = open("config.json") config = json.load(con_file) con_file.close() # Initialize client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] # Run search for each author names in list and get IDs auth_id_list = [] for author in search_list: search_query = "" if len(author[0]) > 0: search_query += f"authfirst({author[0]}) " if len(author[1]) > 0: search_query += f"authlast({author[1]})" auth_srch = ElsSearch(search_query, 'author') auth_srch.execute(client) print( f'\n{author[0]} {author[1]}: {len(auth_srch.results)} results found!\n' ) # If there are more than one author that matches the search, display search results if len(auth_srch.results) > 1: for i, search_result in enumerate(auth_srch.results): first_name = search_result['preferred-name']['given-name'] surname = search_result['preferred-name']['surname'] try: affiliation = search_result['affiliation-current'][ 'affiliation-name'] affiliation_country = search_result['affiliation-current'][ 'affiliation-country'] except KeyError: affiliation = '' affiliation_country = '' print( f"[{i+1}] {first_name} {surname}, {affiliation} ({affiliation_country})" ) # Choose desired author desired_author_index = int(input('\nChoose correct author: ')) - 1 else: desired_author_index = 0 # Get author ID desired_author = auth_srch.results[desired_author_index] link = desired_author['link'][0]['@href'] auth_id = desired_author['dc:identifier'].split(':')[1] auth_id_list.append(auth_id) # Save author ID to JSON with open('authors.json', 'w', encoding='utf-8') as fp: data = {'ids': auth_id_list, 'names': search_list} json.dump(data, fp, indent=4, sort_keys=True) print(link) print('\n-----------\n') print('Grabbing author metrics...') get_author_by_id.get_metrics(client, auth_id_list)
config = json.load(config_file) GET_ALL = config[ 'get_all'] # False gets one chunk (25) True gets all or max (5000) FULL_TEXT = config['full_text'] # Save fulltext OPEN_ACCESS = config[ 'open_access'] # Search only openaccess documents (so we can get the full text) # "public policy AND (impact OR result OR evaluation OR evidence) AND (climate OR environment)" query = config['query'] if OPEN_ACCESS: query = "openaccess(1) AND " + query client = ElsClient(config['api_key']) doc_srch = ElsSearch(query, 'sciencedirect') doc_srch.execute(client, get_all=GET_ALL) for doc in doc_srch.results: doi = doc['dc:identifier'] print(doi) if FULL_TEXT: ## ScienceDirect (full-text) document example using DOI doi_doc = FullDoc(doi=doi) if doi_doc.read(client): doi_doc.write() else: print("Read full-text failed for DOI", doi) print("# Found", len(doc_srch.results), "results.")
if my_auth.read_docs(client): print ("my_auth.doc_list has " + str(len(my_auth.doc_list)) + " items.") my_auth.write_docs() else: print ("Read docs for author failed.") ## Read all documents for example affiliation, then write to disk if my_aff.read_docs(client): print ("my_aff.doc_list has " + str(len(my_aff.doc_list)) + " items.") my_aff.write_docs() else: print ("Read docs for affiliation failed.") ## Initialize author search object and execute search auth_srch = ElsSearch('authlast(keuskamp)','author') auth_srch.execute(client) print ("auth_srch has", len(auth_srch.results), "results.") ## Initialize affiliation search object and execute search aff_srch = ElsSearch('affil(amsterdam)','affiliation') aff_srch.execute(client) print ("aff_srch has", len(aff_srch.results), "results.") ## Initialize doc search object using Scopus and execute search, retrieving # all results doc_srch = ElsSearch("AFFIL(dartmouth) AND AUTHOR-NAME(lewis) AND PUBYEAR > 2011",'scopus') doc_srch.execute(client, get_all = True) print ("doc_srch has", len(doc_srch.results), "results.") ## Initialize doc search object using ScienceDirect and execute search, # retrieving all results
import pandas as pd from elsapy.elsclient import ElsClient from elsapy.elssearch import ElsSearch import json fefu_id = '60103811' con_file = open("config.json") config = json.load(con_file) con_file.close() # view = 'COMPLETE' -- to access more fields client = ElsClient(config['apikey'], num_res = 25) search = ElsSearch('AF-ID( ' + fefu_id + ' )', 'scopus') search.execute(client) sr = search.results result = [] res = {} authorname = '' authid = '' special_fields = ['authname', 'authid', 'prism:coverDisplayDate', 'prism:pageRange', 'openaccessFlag', 'link', 'prism:coverDate'] fields = { 'authname' : 'Authors', 'authid' : 'Author(s) ID', 'dc:title' : 'Title', 'prism:coverDate' : 'Year', 'prism:publicationName' : 'Source title', 'prism:volume' : 'Volume', 'prism:doi' : 'DOI',
# print(test.text, file=output_xml_file) #subj_areas = ["AGRI","ARTS","BIOC","BUSI","CENG","CHEM","COMP","DECI","DENT", # "EART","ECON","ENER","ENGI","ENVI","HEAL","IMMU","MATE","MATH", #"MEDI","NEUR","NURS","PHAR","PHYS","PSYC","SOCI","VETE","MULT"] #Stop-Words listed in the Sci-Dir. Expert search (deemed non-distinct) DOI_List = [] Data_Headers = ["Subj_Area", "DOI"] for subj in subj_areas: for word in comm_words: subj_srch = ElsSearch("{" + word + "}" + '+SUBJAREA(' + subj + ')', 'scidir') subj_srch.execute(client) for article in subj_srch.results: if "dc:identifier" in article: DOI_List.append((subj, article["dc:identifier"])) print(subj, ": ", article["dc:identifier"]) #output data as csv table = pd.DataFrame(data=DOI_List, columns=Data_Headers) table.to_csv('Article_DOIs.csv', index=False) """doi_doc = FullDoc(doi = TEST_DOI) if doi_doc.read(client): print ("doi_doc.title: ", doi_doc.title) doi_doc.write() print(doi_doc.data['originalText']) else:
## Initialize client client = ElsClient(API_KEY) query = 'AUTHFIRST(%s) AND AUTHLASTNAME(%s)' # AND AF-ID(60003892)' #name_list = df["Name"][86].split() #first,last = name_list[0],name_list[len(name_list)-1] name = df["Name"].iloc[26] profile_urls = [] for name in df["Name"]: name_list = name.split() first, last = name_list[0], name_list[len(name_list) - 1] auth_srch = ElsSearch(query % (first, last), 'author') auth_srch.execute(client) #print ("auth_srch has", len(auth_srch.results), "results.") try: url = auth_srch.results[0]['prism:url'] print(name, url) profile_urls.append([name, url]) except: print('Author: ' + name + ' not found.') profile_urls.append([name, '']) scopus_urls = pd.DataFrame(data=profile_urls, columns=['Name', 'URL']) scopus_urls.to_csv('scopus_urls_no_affil_full.csv', index=False) len(scopus_urls['Name'].unique()) len(scopus_urls['URL'].unique()) # scopus_urls_back = scopus_urls
import csv from elsapy.elsclient import ElsClient from elsapy.elssearch import ElsSearch import json import pandas as pd se=input("Enter the Word You Want to Search about: ") print("If you requested full Data it will take along time and also you want to have a good internet connection because it's a big data ") fi=int(input("enter 0 for 25 line from data and 1 for full data: ")) con_file = open("config.json") config = json.load(con_file) con_file.close() client = ElsClient(config['apikey']) doc_srch = ElsSearch(se,'sciencedirect') doc_srch.execute(client, get_all =fi) print ("doc_srch has", len(doc_srch.results), "results.") df = pd.read_json (r'dump.json') df = df[['load-date','dc:title','dc:creator','prism:publicationName']] df.columns = ['Date','Title','Creator','Publication Name'] df.to_csv (r'output.csv', index = []) csvFile = 'output.csv' xmlFile = 'output.xml' csvData = csv.reader(open(csvFile)) xmlData = open(xmlFile, 'w') xmlData.write('<?xml version="1.0"?>' + "\n") xmlData.write('<csv_data>' + "\n") rowNum = 0 csvData=csvData for row in csvData: if rowNum == 0: tags = row for i in range(len(tags)):
print(commer, thisComm['lastname']) for _, auth in thisComm.iterrows(): idList = authDF[authDF['lastname'] == auth['lastname']] commString = '' for _, aID in idList.iterrows(): if len(commString): commString = commString + '+OR' commString = commString + '+AU-ID(' + str(aID['aid']) + ')' #u_id(24588214300) ## Initialize doc search object and execute search, retrieving all results #doc_srch = ElsSearch('collective+movement+ecology+AU-ID(24588214300)+AU-ID(23479355600)','scopus') doc_srch = ElsSearch('title("collective")+' + commString, 'scopus') doc_srch.execute(client, get_all=True) cbCount = len(doc_srch.results) # title("neuropsychological evidence") doc_srch = ElsSearch('title("ecology")+' + commString, 'scopus') doc_srch.execute(client, get_all=True) meCount = len(doc_srch.results) print(meCount, cbCount) if meCount > cbCount: me_auth += 1 print(auth['lastname'] + ' ME!') if meCount < cbCount: cb_auth += 1 print(auth['lastname'] + ' CB!') if me_auth > cb_auth: authDF.loc[authDF.isin(thisComm['lastname'].values)['lastname'],
def detailed_auth_query(auth_last, auth_first): auth_data = [auth_last, auth_first, '', ''] print("Searching for author %s, %s" % (auth_last, auth_first)) # Initialize search object and execute search under the author index query = 'authlast(%s)+AND+authfirst(%s)' % (auth_last, auth_first) try: auth_srch = ElsSearch(query, 'author') auth_srch.execute(client, get_all=False) except: # Load other configuration with new API Key con_file = open("config2.json") config = json.load(con_file) con_file.close() # Initialize new client client = ElsClient(config['apikey']) client.inst_token = config['insttoken'] auth_srch = ElsSearch(query, 'author') auth_srch.execute(client, get_all=False) if (len(auth_srch.results) == 1): print("auth_srch has", len(auth_srch.results), "result.") else: print("auth_srch has", len(auth_srch.results), "results.") # checking if no results at all error_message = auth_srch.results[0].get('error') if (len(auth_srch.results) > 0): if (not error_message): print("Into the results...") # grabs the author_id from the search data for i in range(len(auth_srch.results)): try: string_author_id = auth_srch.results[i].get('dc:identifier') # this line cuts the author id string from the end of AUTHOR_ID # to the end of the id digits author_id = string_author_id[10:] print("author_id : %s" % author_id) auth_data[2] = author_id except AttributeError: print("Could not extract auth_id field for %s, %s" % (auth_last, auth_first)) auth_data[2] = "CNE" # grabs the curr_affil from the search data # appends it to auth_data try: dict_curr_affil = auth_srch.results[i].get('affiliation-current') curr_affil = dict_curr_affil.get('affiliation-name') print("curr_affil : %s" % curr_affil) except AttributeError: print("Could not extract curr_affil field for %s, %s" % (auth_last, auth_first)) auth_data[3] = "CNE" try: # if UR not current affil go on and search history if (not isUR(curr_affil)): affil_hist = auth_id_query(auth_data[2]) try: if (len(affil_hist) > 1): for institution in affil_hist: try: affil_instance = institution['ip-doc']['preferred-name']['$'] # if UR affil is found, return immediately if (isUR(affil_instance)): curr_affil = affil_instance auth_data[3] = curr_affil return auth_data except: print("Affiliation instance data for %s,%s wasn't structured correctly." % (auth_data[0], auth_data[1])) # print(institution) else: try: affil_instance = affil_hist['ip-doc']['preferred-name']['$'] try: # if UR affil is found, return immediately if (isUR(affil_instance)): curr_affil = affil_instance auth_data[3] = curr_affil return auth_data except TypeError: print("isUR error") print(affil_instance) except: print("Affiliation instance data for %s,%s wasn't structured correctly." % (auth_data[0], auth_data[1])) # print(institution) except TypeError: print("Type Error occured for affil_hist of %s,%s" % (auth_data[0], auth_data[1])) print(affil_hist) # but if it is then return immediately else: print("Returned with curr_affil : '%s' for %s,%s" % (curr_affil, auth_data[0], auth_data[1])) auth_data[3] = curr_affil return auth_data except: print("Something wrong within the returned profile data of %s,%s" % (auth_data[0], auth_data[1])) # this is the case of hitting the cap of 25, too many people down the list if (len(auth_srch.results) >= 25): print("Results CAP of 25 was hit for the %d results of %s,%s" % (len(auth_srch.results), auth_data[0], auth_data[1])) auth_data[3] = 'max' return auth_data # this covers the case of no UR affils found at all elif (len(auth_srch.results) < 25): print("EXHAUSTED results list of %d results for %s,%s" % (len(auth_srch.results), auth_data[0], auth_data[1])) auth_data[3] = 'na' return auth_data # this could be a false positive! the author name could be in the name-variant field # I redo the query down below in the next function else: auth_data[2] = 'DNE' auth_data[3] = 'DNE' print(error_message) else: print("very bad error @ length of auth_srch.results <= 0") auth_data[2] = 'NONE' auth_data[3] = 'NONE' return auth_data
def get_search(keywords, client): doc_srch = ElsSearch(keywords, 'scopus') doc_srch.execute(client, get_all=True) print("doc_srch has", len(doc_srch.results), "results.") df = doc_srch.results_df return (df)
string_2 = 'REF' + '%28' + quote_plus(string_2) + '%29' # Search for publications from this year. If the final search of the year is not exactly at the end of the year, then some publications could be missed. Therefore, we use a different PUBYEAR string in January (month == 1) of the following year (assuming that searches are done at least once per month). if month == 1: year = year - 2 year = 'PUBYEAR > ' + str(year) else: year = 'PUBYEAR = ' + str(year) year = quote_plus(year) encoded_search_string = string_1 + '+OR+' + string_2 + '+AND+' + year # Initialize doc search object and execute search, retrieving <=25 results if # get_all=False or <=5000 results if get_all=True. search = ElsSearch(encoded_search_string, 'scopus') search.execute(client, get_all=True) # Save the results as a new record in the Search table results = search.results source = Source.objects.get(source='Scopus') topic = search_string.topic record = Search( topic=topic, search_string=search_string, source=source, results=results, ) record.save() event = 'search_scopus.py'