Пример #1
0
def counter_check(counter, SID):
    counter += 1
    if counter >= 2499:
        SID = wok_soap.auth()
        counter = 0

    return [counter, SID]
Пример #2
0
def counter_check(counter, SID):
    counter += 1
    if counter >= 2499:
        SID = wok_soap.auth()
        counter = 0

    return [counter, SID]
Пример #3
0
def counter_check(counter, SID):
    counter += 1
    if counter >= 2499 or SID == "": # Because can only get 2500 records in a given session
        SID = wok_soap.auth()
        counter = 0

    return [counter, SID]
Пример #4
0
def construct_data(csv_file):

    # start session
    SID = wok_soap.auth()
    counter = 0

    # run search to output list of grant numbers and list of search results file names
    grants_and_files = submit_search.search_by_grant(csv_file, SID)
    grant_list = grants_and_files[0]
    file_list = grants_and_files[1]

    # start new session after 2000 records have been processed
    counter += grants_and_files[2]
    if counter >= 2499:
        SID = wok_soap.auth()
        counter = 0

    # construct dictionary to hold grant data
    data = [{"Award Number": grant_list[i],
             "Number of Publications": 0}
            for i in range(len(grant_list))]

    for i, filename in enumerate(file_list):

        # open search results file and parse as XML
        with open(filename, "r+") as h:
            tree = ET.parse(h)
            root = tree.getroot()
            print "parsing " + filename

        data[i]["Number of Publications"] = len(root)
        paper_list = []

        for rec in range(len(root)):

            record = root[rec]
            paper = process_article(record)

            paper = citation_analysis(paper, SID, counter)
            paper_list.append(paper)

        data[i]["__paper list"] = paper_list

    return data
Пример #5
0
def construct_data(csv_file):

    # start session
    SID = wok_soap.auth()
    counter = 0

    # run search to output list of grant numbers and list of search results file names
    grants_and_files = submit_search.search_by_grant(csv_file, SID)
    grant_list = grants_and_files[0]
    file_list = grants_and_files[1]

    # start new session before 2500 records have been processed
    counter += grants_and_files[2]
    if counter >= 2499:
        SID = wok_soap.auth()
        counter = 0

    # construct dictionary to hold grant data
    data = [{"Award Number": grant_list[i],
             "Number of Publications": 0}
            for i in range(len(grant_list))]

    for i, filename in enumerate(file_list):

        # open search results file and parse as XML
        with open(filename, "rb") as h:
            tree = ET.parse(h)
            root = tree.getroot()
            print("parsing " + filename)

        data[i]["Number of Publications"] = len(root)
        paper_list = []

        for rec in range(len(root)):

            record = root[rec]
            paper = process_article(record)

            paper = citation_analysis(paper, SID, counter)
            paper_list.append(paper)

        data[i]["__paper list"] = paper_list

    return data
Пример #6
0
def print_pub_table_from_DOIs(csv_file):

    # start session
    SID = wok_soap.auth()
    counter = 0

    # run search to output list of search results file names
    search_output = submit_search.search_by_DOI(csv_file, SID)
    file_list = search_output[0]
    counter = search_output[1]
    print("found " + str(len(file_list)) + " files")

    paper_list = []

    # loop through each entry in the list of files found
    for i, filename in enumerate(file_list):

        # open search results file and parse as XML
        with open(filename) as h:
            tree = ET.parse(h)
            root = tree.getroot()

        # if the file is not empty, process the record
        if root:
            record = root[0]
            paper = process_article(record)
            paper = citation_analysis(paper, SID, counter)
            print("parsed " + filename)
            paper_list.append(paper)
        else:
            print("no paper found")

    print("printing publication table")
    with open("WOS_scraping - " + csv_file[:-4] + " - " +
              time.strftime("%d %b %Y") + ".csv", "w") as g:

        writer = csv.writer(g, delimiter=',')

        example_paper = paper_list[0]
        heading_tuples = sorted(example_paper.items(), key=lambda k: k[0])[:-2]
        heading = [field[0] for field in heading_tuples]
        writer.writerow(heading)

        # Fill in values for paper data
        for paper in paper_list:
            print("writing row for " + paper["DOI"])
            dictionary_tuples = sorted(paper.items(), key=lambda k: k[0])[:-2]
            row = [field[1] for field in dictionary_tuples]
            writer.writerow(row)
Пример #7
0
def citation_analysis(paper, SID, counter):
    UID = paper["UID"]

    cited_refs_output = submit_search.search_for_cited_refs(UID, SID)
    counter += cited_refs_output[1]
    if counter >= 2499:
        SID = wok_soap.auth()
        counter = 0

    paper["__cited references"] = process_cited_refs(cited_refs_output)

    if paper["__cited references"]:
        year_list = [int(ref["Year"]) for ref in paper["__cited references"] if ref["Year"] != ""]
        average_year = sum(year_list) / float(len(year_list))
        paper["Average Age of Reference"] = int(paper["Publication Year"]) - average_year

        journal_list = [ref["Cited Work"] for ref in paper["__cited references"]]
        same_journal_list = [y for y in journal_list if y == paper["Journal Title"]]
        paper["Diversity Index"] = 1 - (len(same_journal_list) / float(len(journal_list)))

    citing_articles_output = submit_search.search_for_citing_articles(UID, SID)
    counter += citing_articles_output[1]
    # if counter >= 2499:
    #     SID = wok_soap.auth()
    #     counter = 0

    paper["__citing articles"] = process_citing_articles(citing_articles_output)

    paper["Times Cited through 12-31-2015"] = len(paper["__citing articles"])

    for year in range(-1, 4):
        key = "Citations in Year " + str(year)

        paper[key] = 0
        citations = [article["Publication Year"] for article in paper["__citing articles"]
                     if int(article["Publication Year"]) - int(paper["Publication Year"]) == year]

        if citations:
            paper["Citations in Year " + str(year)] = len(citations)

    for year in range(4):
        date_format = "%Y-%m-%d"
        key = "Citations in month %s to %s" % (str((year-1)*12), str(year*12))

        paper[key] = 0
        citations_2 = []

        for article in paper["__citing articles"]:
            delta = d.strptime(article["Publication Date"], date_format) - d.strptime(paper["Publication Date"], date_format)
            article["Cite Time"] = delta.days / float(365)
            if year-1 < article["Cite Time"] <= year:
                citations_2.append(article["Publication Date"])

        if citations_2:
            paper[key] = len(citations_2)

    for year in range(2009, 2016):
        key = "Citations in %s" % (str(year))

        paper[key] = 0
        citations_3 = []

        for article in paper["__citing articles"]:
            if int(article["Publication Year"]) == year:
                citations_3.append(article["Publication Year"])

        if citations_3:
            paper[key] = len(citations_3)

    return paper
Пример #8
0
def citation_analysis(paper, SID, counter):
    UID = paper["UID"]

    # search for references cited in the paper
    cited_refs_output = submit_search.search_for_cited_refs(UID, SID)
    counter += cited_refs_output[1]
    if counter >= 2499:
        SID = wok_soap.auth()
        counter = 0

    paper["__cited references"] = process_cited_refs(cited_refs_output)

    if paper["__cited references"]:
        year_list = [int(ref["Year"]) for ref in paper["__cited references"] if ref["Year"] != ""]
        average_year = sum(year_list) / float(len(year_list))
        paper["Average Age of Reference"] = int(paper["Publication Year"]) - average_year

        journal_list = [ref["Cited Work"] for ref in paper["__cited references"]]
        same_journal_list = [y for y in journal_list if y == paper["Journal Title"]]
        paper["Diversity Index"] = 1 - (len(same_journal_list) / float(len(journal_list)))

    # search for articles that cite the paper
    citing_articles_output = submit_search.search_for_citing_articles(UID, SID)
    counter += citing_articles_output[1]

    paper["__citing articles"] = process_citing_articles(citing_articles_output)

    paper["Times Cited through Search Period"] = len(paper["__citing articles"])

    # count citations in calendar years relative to the year of publication, up to year 13 inclusive
    for year in range(-1, 14):
        key = "Citations in Year " + str(year)

        paper[key] = 0
        citations = []

        citations = [article["Publication Year"] for article in paper["__citing articles"]
                     if int(article["Publication Year"]) - int(paper["Publication Year"]) == year]

        if citations:
            paper[key] = len(citations)

    # count citations in 12 month periods
    for year in range(4):
        date_format = "%Y-%m-%d"
        key = "Citations in month %s to %s" % (str((year-1)*12), str(year*12))

        paper[key] = 0
        citations_2 = []

        for article in paper["__citing articles"]:
            delta = d.strptime(article["Publication Date"], date_format) - d.strptime(paper["Publication Date"], date_format)
            article["Cite Time"] = delta.days / float(365)
            if year-1 < article["Cite Time"] <= year:
                citations_2.append(article["Publication Date"])

        if citations_2:
            paper[key] = len(citations_2)

    # count citations in calendar years 2003-2017 inclusive
    for year in range(2003, 2018):
        key = "Citations in %s" % (str(year))

        paper[key] = 0
        citations_3 = []

        for article in paper["__citing articles"]:
            if int(article["Publication Year"]) == year:
                citations_3.append(article["Publication Year"])

        if citations_3:
            paper[key] = len(citations_3)

    return paper
Пример #9
0
        root = ET.fromstring(results_unicode)
        length = len(root)
        if length != results_count:
            raise Exception

        # Write raw search results to txt file
        with open(filename, "w") as f:
            f.write(results_unicode)

    return [filename, counter]


def counter_check(counter, SID):
    counter += 1
    if counter >= 2499:
        SID = wok_soap.auth()
        counter = 0

    return [counter, SID]


csv_file = "DOE grant short list.csv"
UID = "WOS:000346178800058"

if __name__ == '__main__':
    SID = wok_soap.auth()
    search_for_citing_articles(UID, SID)


Пример #10
0
        if length != results_count:
            raise Exception

        # Write raw search results to txt file
        with open(filename, "w") as f:
            f.write(results_unicode)

    return [filename, counter]


def counter_check(counter, SID):
    counter += 1
    if counter >= 2499:
        SID = wok_soap.auth()
        counter = 0

    return [counter, SID]


if __name__ == '__main__':

    SID = wok_soap.auth()

    csv_file = "example DOIs.csv"
    file_list = search_by_DOI(csv_file, SID)
    print(file_list[0])

#    UID = "WOS:000283490400005"
#
#    search_for_citing_articles(UID, SID)