def counter_check(counter, SID): counter += 1 if counter >= 2499: SID = wok_soap.auth() counter = 0 return [counter, SID]
def counter_check(counter, SID): counter += 1 if counter >= 2499 or SID == "": # Because can only get 2500 records in a given session SID = wok_soap.auth() counter = 0 return [counter, SID]
def construct_data(csv_file): # start session SID = wok_soap.auth() counter = 0 # run search to output list of grant numbers and list of search results file names grants_and_files = submit_search.search_by_grant(csv_file, SID) grant_list = grants_and_files[0] file_list = grants_and_files[1] # start new session after 2000 records have been processed counter += grants_and_files[2] if counter >= 2499: SID = wok_soap.auth() counter = 0 # construct dictionary to hold grant data data = [{"Award Number": grant_list[i], "Number of Publications": 0} for i in range(len(grant_list))] for i, filename in enumerate(file_list): # open search results file and parse as XML with open(filename, "r+") as h: tree = ET.parse(h) root = tree.getroot() print "parsing " + filename data[i]["Number of Publications"] = len(root) paper_list = [] for rec in range(len(root)): record = root[rec] paper = process_article(record) paper = citation_analysis(paper, SID, counter) paper_list.append(paper) data[i]["__paper list"] = paper_list return data
def construct_data(csv_file): # start session SID = wok_soap.auth() counter = 0 # run search to output list of grant numbers and list of search results file names grants_and_files = submit_search.search_by_grant(csv_file, SID) grant_list = grants_and_files[0] file_list = grants_and_files[1] # start new session before 2500 records have been processed counter += grants_and_files[2] if counter >= 2499: SID = wok_soap.auth() counter = 0 # construct dictionary to hold grant data data = [{"Award Number": grant_list[i], "Number of Publications": 0} for i in range(len(grant_list))] for i, filename in enumerate(file_list): # open search results file and parse as XML with open(filename, "rb") as h: tree = ET.parse(h) root = tree.getroot() print("parsing " + filename) data[i]["Number of Publications"] = len(root) paper_list = [] for rec in range(len(root)): record = root[rec] paper = process_article(record) paper = citation_analysis(paper, SID, counter) paper_list.append(paper) data[i]["__paper list"] = paper_list return data
def print_pub_table_from_DOIs(csv_file): # start session SID = wok_soap.auth() counter = 0 # run search to output list of search results file names search_output = submit_search.search_by_DOI(csv_file, SID) file_list = search_output[0] counter = search_output[1] print("found " + str(len(file_list)) + " files") paper_list = [] # loop through each entry in the list of files found for i, filename in enumerate(file_list): # open search results file and parse as XML with open(filename) as h: tree = ET.parse(h) root = tree.getroot() # if the file is not empty, process the record if root: record = root[0] paper = process_article(record) paper = citation_analysis(paper, SID, counter) print("parsed " + filename) paper_list.append(paper) else: print("no paper found") print("printing publication table") with open("WOS_scraping - " + csv_file[:-4] + " - " + time.strftime("%d %b %Y") + ".csv", "w") as g: writer = csv.writer(g, delimiter=',') example_paper = paper_list[0] heading_tuples = sorted(example_paper.items(), key=lambda k: k[0])[:-2] heading = [field[0] for field in heading_tuples] writer.writerow(heading) # Fill in values for paper data for paper in paper_list: print("writing row for " + paper["DOI"]) dictionary_tuples = sorted(paper.items(), key=lambda k: k[0])[:-2] row = [field[1] for field in dictionary_tuples] writer.writerow(row)
def citation_analysis(paper, SID, counter): UID = paper["UID"] cited_refs_output = submit_search.search_for_cited_refs(UID, SID) counter += cited_refs_output[1] if counter >= 2499: SID = wok_soap.auth() counter = 0 paper["__cited references"] = process_cited_refs(cited_refs_output) if paper["__cited references"]: year_list = [int(ref["Year"]) for ref in paper["__cited references"] if ref["Year"] != ""] average_year = sum(year_list) / float(len(year_list)) paper["Average Age of Reference"] = int(paper["Publication Year"]) - average_year journal_list = [ref["Cited Work"] for ref in paper["__cited references"]] same_journal_list = [y for y in journal_list if y == paper["Journal Title"]] paper["Diversity Index"] = 1 - (len(same_journal_list) / float(len(journal_list))) citing_articles_output = submit_search.search_for_citing_articles(UID, SID) counter += citing_articles_output[1] # if counter >= 2499: # SID = wok_soap.auth() # counter = 0 paper["__citing articles"] = process_citing_articles(citing_articles_output) paper["Times Cited through 12-31-2015"] = len(paper["__citing articles"]) for year in range(-1, 4): key = "Citations in Year " + str(year) paper[key] = 0 citations = [article["Publication Year"] for article in paper["__citing articles"] if int(article["Publication Year"]) - int(paper["Publication Year"]) == year] if citations: paper["Citations in Year " + str(year)] = len(citations) for year in range(4): date_format = "%Y-%m-%d" key = "Citations in month %s to %s" % (str((year-1)*12), str(year*12)) paper[key] = 0 citations_2 = [] for article in paper["__citing articles"]: delta = d.strptime(article["Publication Date"], date_format) - d.strptime(paper["Publication Date"], date_format) article["Cite Time"] = delta.days / float(365) if year-1 < article["Cite Time"] <= year: citations_2.append(article["Publication Date"]) if citations_2: paper[key] = len(citations_2) for year in range(2009, 2016): key = "Citations in %s" % (str(year)) paper[key] = 0 citations_3 = [] for article in paper["__citing articles"]: if int(article["Publication Year"]) == year: citations_3.append(article["Publication Year"]) if citations_3: paper[key] = len(citations_3) return paper
def citation_analysis(paper, SID, counter): UID = paper["UID"] # search for references cited in the paper cited_refs_output = submit_search.search_for_cited_refs(UID, SID) counter += cited_refs_output[1] if counter >= 2499: SID = wok_soap.auth() counter = 0 paper["__cited references"] = process_cited_refs(cited_refs_output) if paper["__cited references"]: year_list = [int(ref["Year"]) for ref in paper["__cited references"] if ref["Year"] != ""] average_year = sum(year_list) / float(len(year_list)) paper["Average Age of Reference"] = int(paper["Publication Year"]) - average_year journal_list = [ref["Cited Work"] for ref in paper["__cited references"]] same_journal_list = [y for y in journal_list if y == paper["Journal Title"]] paper["Diversity Index"] = 1 - (len(same_journal_list) / float(len(journal_list))) # search for articles that cite the paper citing_articles_output = submit_search.search_for_citing_articles(UID, SID) counter += citing_articles_output[1] paper["__citing articles"] = process_citing_articles(citing_articles_output) paper["Times Cited through Search Period"] = len(paper["__citing articles"]) # count citations in calendar years relative to the year of publication, up to year 13 inclusive for year in range(-1, 14): key = "Citations in Year " + str(year) paper[key] = 0 citations = [] citations = [article["Publication Year"] for article in paper["__citing articles"] if int(article["Publication Year"]) - int(paper["Publication Year"]) == year] if citations: paper[key] = len(citations) # count citations in 12 month periods for year in range(4): date_format = "%Y-%m-%d" key = "Citations in month %s to %s" % (str((year-1)*12), str(year*12)) paper[key] = 0 citations_2 = [] for article in paper["__citing articles"]: delta = d.strptime(article["Publication Date"], date_format) - d.strptime(paper["Publication Date"], date_format) article["Cite Time"] = delta.days / float(365) if year-1 < article["Cite Time"] <= year: citations_2.append(article["Publication Date"]) if citations_2: paper[key] = len(citations_2) # count citations in calendar years 2003-2017 inclusive for year in range(2003, 2018): key = "Citations in %s" % (str(year)) paper[key] = 0 citations_3 = [] for article in paper["__citing articles"]: if int(article["Publication Year"]) == year: citations_3.append(article["Publication Year"]) if citations_3: paper[key] = len(citations_3) return paper
root = ET.fromstring(results_unicode) length = len(root) if length != results_count: raise Exception # Write raw search results to txt file with open(filename, "w") as f: f.write(results_unicode) return [filename, counter] def counter_check(counter, SID): counter += 1 if counter >= 2499: SID = wok_soap.auth() counter = 0 return [counter, SID] csv_file = "DOE grant short list.csv" UID = "WOS:000346178800058" if __name__ == '__main__': SID = wok_soap.auth() search_for_citing_articles(UID, SID)
if length != results_count: raise Exception # Write raw search results to txt file with open(filename, "w") as f: f.write(results_unicode) return [filename, counter] def counter_check(counter, SID): counter += 1 if counter >= 2499: SID = wok_soap.auth() counter = 0 return [counter, SID] if __name__ == '__main__': SID = wok_soap.auth() csv_file = "example DOIs.csv" file_list = search_by_DOI(csv_file, SID) print(file_list[0]) # UID = "WOS:000283490400005" # # search_for_citing_articles(UID, SID)