def search_for_citing_articles(UID, SID): directory = "citing articles search results xml" if not os.path.exists(directory): os.makedirs(directory) filename = "citing articles search results xml/" + UID[4:] + ".txt" counter = 0 # Save file with citing article data if it hasn't been saved yet if not os.path.exists(filename): print(UID + " citing articles") # Search on WOS results = wok_soap.citingArticles(UID, SID) [counter, SID] = counter_check(counter, SID) queryId = results[0] results_count = results[1] # Interpret raw search results stored in 5th line of object results_unicode = results[4] if results_count > 100: retrieve_count = (results_count // 100) if results_count % 100 == 0: retrieve_count -= 1 for hundred in range(retrieve_count): start_count = (100 * hundred) + 101 more_results = wok_soap.retrieve(queryId, SID, start_count, "Fields") [counter, SID] = counter_check(counter, SID) more_results_unicode = more_results[0] results_unicode = results_unicode[:-10] + more_results_unicode[ 82:] root = ET.fromstring(results_unicode) length = len(root) if length != results_count: raise Exception # Write raw search results to txt file with open(filename, "w") as f: f.write(results_unicode) return [filename, counter]
def search_for_citing_articles(UID, SID): directory = "citing articles search results xml" if not os.path.exists(directory): os.makedirs(directory) filename = "citing articles search results xml/" + UID[4:] + ".txt" counter = 0 # Save file with citing article data if it hasn't been saved yet if not os.path.exists(filename): print UID + " citing articles" # Search on WOS results = wok_soap.citingArticles(UID, SID) [counter, SID] = counter_check(counter, SID) queryId = results[0] results_count = results[1] print results_count # Interpret raw search results stored in 5th line of object results_unicode = results[4].encode('utf-8') if results_count > 100: retrieve_count = (results_count // 100) if results_count % 100 == 0: retrieve_count -= 1 for hundred in range(retrieve_count): start_count = (100*hundred) + 101 more_results = wok_soap.retrieve(queryId, SID, start_count, "Fields") [counter, SID] = counter_check(counter, SID) more_results_unicode = more_results[0].encode('utf-8') results_unicode = results_unicode[:-10] + more_results_unicode[82:] root = ET.fromstring(results_unicode) length = len(root) if length != results_count: raise Exception # Write raw search results to txt file with open(filename, "w") as f: f.write(results_unicode) return [filename, counter]
def search_by_grant(csv_file, SID): directory = "grant search results xml" if not os.path.exists(directory): os.makedirs(directory) with open(csv_file, "rb") as h: text = csv.reader(h) grant_list = [row[0] for row in text] file_list = [] counter = 0 for i, cell in enumerate(grant_list): # Define query grant_number_full = cell if grant_number_full[0:2] == "DE": prefix = grant_number_full[3:5] grant_number = grant_number_full[5:] query = "FT = " + prefix + grant_number + " OR FT = " + prefix + " " + grant_number filename = "grant search results xml/" + query + ".txt" else: query = "FT = " + grant_number_full filename = "grant search results xml/" + query.replace("/","") + ".txt" file_list.append(filename) if not os.path.exists(filename): print query # Search on WOS results = wok_soap.search(query, SID) [counter, SID] = counter_check(counter, SID) queryId = results[0] results_count = results[1] # Interpret raw search results stored in 4th line of object results_unicode = results[3].encode('utf-8') if results_count > 100: retrieve_count = (results_count // 100) if results_count % 100 == 0: retrieve_count -= 1 for hundred in range(retrieve_count): start_count = (100*hundred) + 101 more_results = wok_soap.retrieve(queryId, SID, start_count, "FullRecord") [counter, SID] = counter_check(counter, SID) more_results_unicode = more_results[0].encode('utf-8') results_unicode = results_unicode[:-10] + more_results_unicode[86:] root = ET.fromstring(results_unicode) length = len(root) if length != results_count: raise # Write raw search results to txt file with open(filename, "w") as f: f.write(results_unicode) return [grant_list, file_list, counter]
def search_by_grant(csv_file, SID): directory = "grant search results xml" if not os.path.exists(directory): os.makedirs(directory) with open(csv_file) as h: text = csv.reader(h) grant_list = [row[0] for row in text] file_list = [] counter = 0 for i, cell in enumerate(grant_list): # Define query grant_number_full = cell if grant_number_full[0:2] == "DE": prefix = grant_number_full[3:5] grant_number = grant_number_full[5:] query = "FT = " + prefix + grant_number + " OR FT = " + prefix + " " + grant_number filename = "grant search results xml/" + query + ".txt" else: query = "FT = " + grant_number_full filename = "grant search results xml/" + query.replace("/", "") + ".txt" file_list.append(filename) if not os.path.exists(filename): print(query) # Search on WOS results = wok_soap.search(query, SID) [counter, SID] = counter_check(counter, SID) queryId = results[0] results_count = results[1] # Interpret raw search results stored in 4th line of object results_unicode = results[3] if results_count > 100: retrieve_count = (results_count // 100) if results_count % 100 == 0: retrieve_count -= 1 for hundred in range(retrieve_count): start_count = (100 * hundred) + 101 more_results = wok_soap.retrieve(queryId, SID, start_count, "FullRecord") [counter, SID] = counter_check(counter, SID) more_results_unicode = more_results[0].encode('utf-8') results_unicode = results_unicode[: -10] + more_results_unicode[ 86:] root = ET.fromstring(results_unicode) length = len(root) if length != results_count: raise # Write raw search results to txt file with open(filename, "w") as f: f.write(results_unicode) return [grant_list, file_list, counter]
def searchByGrantOrDOI(csv_file, searchType): directory = "search by grant or doi xml/" if not os.path.exists(directory): # Check for and create a directory os.makedirs(directory) column1List = [] with open(csv_file) as h: # Open a CSV file text = csv.reader(h) column1List = [row[0].replace(u'\ufeff','') for row in text] # gets rid of '\ufeff' at beginning of csv counter = 0 SID = "" # define queryList, a list of queries queryList = [] file_list = [] # === Handle second argument, searchType ==== searchType = searchType.lower() # converts the string to lowercase acceptableSearchTypes = ["grant", "doi"] # later can add author, etc if searchType not in acceptableSearchTypes: # raise error if grantOrDOI is not a grant or a doi raise Exception("Second argument of searchByGrantOrDOI must be 'grant' or 'doi'") # CREATE QUERY if searchType == "grant": # === Create grant query === for fullNumber in column1List: if fullNumber[0:2] == "DE": prefix = fullNumber[3:5] grantNumber = fullNumber[5:] query = "FT = " + prefix + grantNumber + " OR FT = " + prefix + " " + grantNumber else: query = "FT = " + str(fullNumber) queryList.append(query) elif searchType == "doi": #=== Create DOI query === for result in column1List: print(result) ID = result.strip(' \t\n\r').replace(" ","").replace(u'\u200b','') #remove non-printing characters print("ID is " + ID) if ID[0:3] == "WOS": # Define query query = "UT = " + ID print("query = " + str(query)) else: query = 'DO = "' + ID + '"' print("query = " + str(query)) queryList.append(query) for q in queryList: # create filename without slashes or quotes filename = directory + q.replace("/"," ").replace('"',"") + ".txt" # Add each file to file list file_list.append(filename) # Search on WOS if not os.path.exists(filename): [counter, SID] = counter_check(counter, SID) results = wok_soap.search(q, SID) queryId = results[0] results_count = results[1] results_unicode = results[3] # Handling throttle problems - can't get more than 100 at once if results_count > 100: retrieve_count = (results_count // 100) if results_count % 100 == 0: retrieve_count -= 1 for hundred in range(retrieve_count): start_count = (100*hundred) + 101 [counter, SID] = counter_check(counter, SID) more_results = wok_soap.retrieve(queryId, SID, start_count, "FullRecord") more_results_unicode = more_results[0].encode('utf-8') results_unicode = str(results_unicode[:-10]) + str(more_results_unicode[86:]) root = ET.fromstring(results_unicode) # ET = element tree. results_unicode is the object that contains all the search results length = len(root) if length != results_count: raise Exception("length does not equal results_count")# throw error message # Write raw search results to txt file with open(filename, "w") as f: f.write(results_unicode) print(file_list) return [column1List, file_list, counter] #subscription allows only 2500 records/session.