示例#1
0
def search_for_citing_articles(UID, SID):
    directory = "citing articles search results xml"
    if not os.path.exists(directory):
        os.makedirs(directory)

    filename = "citing articles search results xml/" + UID[4:] + ".txt"
    counter = 0

    # Save file with citing article data if it hasn't been saved yet
    if not os.path.exists(filename):
        print(UID + " citing articles")

        # Search on WOS
        results = wok_soap.citingArticles(UID, SID)
        [counter, SID] = counter_check(counter, SID)

        queryId = results[0]
        results_count = results[1]

        # Interpret raw search results stored in 5th line of object
        results_unicode = results[4]

        if results_count > 100:
            retrieve_count = (results_count // 100)

            if results_count % 100 == 0:
                retrieve_count -= 1

            for hundred in range(retrieve_count):
                start_count = (100 * hundred) + 101
                more_results = wok_soap.retrieve(queryId, SID, start_count,
                                                 "Fields")

                [counter, SID] = counter_check(counter, SID)
                more_results_unicode = more_results[0]
                results_unicode = results_unicode[:-10] + more_results_unicode[
                    82:]

        root = ET.fromstring(results_unicode)
        length = len(root)
        if length != results_count:
            raise Exception

        # Write raw search results to txt file
        with open(filename, "w") as f:
            f.write(results_unicode)

    return [filename, counter]
示例#2
0
def search_for_citing_articles(UID, SID):
    directory = "citing articles search results xml"
    if not os.path.exists(directory):
        os.makedirs(directory)

    filename = "citing articles search results xml/" + UID[4:] + ".txt"
    counter = 0

    # Save file with citing article data if it hasn't been saved yet
    if not os.path.exists(filename):
        print UID + " citing articles"

        # Search on WOS
        results = wok_soap.citingArticles(UID, SID)
        [counter, SID] = counter_check(counter, SID)

        queryId = results[0]
        results_count = results[1]
        print results_count

        # Interpret raw search results stored in 5th line of object
        results_unicode = results[4].encode('utf-8')

        if results_count > 100:
            retrieve_count = (results_count // 100)

            if results_count % 100 == 0:
                retrieve_count -= 1

            for hundred in range(retrieve_count):
                start_count = (100*hundred) + 101
                more_results = wok_soap.retrieve(queryId, SID, start_count, "Fields")

                [counter, SID] = counter_check(counter, SID)
                more_results_unicode = more_results[0].encode('utf-8')
                results_unicode = results_unicode[:-10] + more_results_unicode[82:]

        root = ET.fromstring(results_unicode)
        length = len(root)
        if length != results_count:
            raise Exception

        # Write raw search results to txt file
        with open(filename, "w") as f:
            f.write(results_unicode)

    return [filename, counter]
示例#3
0
def search_by_grant(csv_file, SID):
    directory = "grant search results xml"
    if not os.path.exists(directory):
        os.makedirs(directory)

    with open(csv_file, "rb") as h:
        text = csv.reader(h)
        grant_list = [row[0] for row in text]

    file_list = []
    counter = 0

    for i, cell in enumerate(grant_list):
        # Define query
        grant_number_full = cell
        if grant_number_full[0:2] == "DE":
            prefix = grant_number_full[3:5]
            grant_number = grant_number_full[5:]
            query = "FT = " + prefix + grant_number + " OR FT = " + prefix + " " + grant_number
            filename = "grant search results xml/" + query + ".txt"
        else:
            query = "FT = " + grant_number_full
            filename = "grant search results xml/" + query.replace("/","") + ".txt"

        file_list.append(filename)

        if not os.path.exists(filename):
            print query

            # Search on WOS
            results = wok_soap.search(query, SID)
            [counter, SID] = counter_check(counter, SID)

            queryId = results[0]
            results_count = results[1]

            # Interpret raw search results stored in 4th line of object
            results_unicode = results[3].encode('utf-8')

            if results_count > 100:
                retrieve_count = (results_count // 100)

                if results_count % 100 == 0:
                    retrieve_count -= 1

                for hundred in range(retrieve_count):
                    start_count = (100*hundred) + 101
                    more_results = wok_soap.retrieve(queryId, SID, start_count, "FullRecord")

                    [counter, SID] = counter_check(counter, SID)
                    more_results_unicode = more_results[0].encode('utf-8')
                    results_unicode = results_unicode[:-10] + more_results_unicode[86:]

            root = ET.fromstring(results_unicode)
            length = len(root)
            if length != results_count:
                raise

            # Write raw search results to txt file
            with open(filename, "w") as f:
                f.write(results_unicode)

    return [grant_list, file_list, counter]
示例#4
0
def search_by_grant(csv_file, SID):
    directory = "grant search results xml"
    if not os.path.exists(directory):
        os.makedirs(directory)

    with open(csv_file) as h:
        text = csv.reader(h)
        grant_list = [row[0] for row in text]

    file_list = []
    counter = 0

    for i, cell in enumerate(grant_list):

        # Define query
        grant_number_full = cell
        if grant_number_full[0:2] == "DE":
            prefix = grant_number_full[3:5]
            grant_number = grant_number_full[5:]
            query = "FT = " + prefix + grant_number + " OR FT = " + prefix + " " + grant_number
            filename = "grant search results xml/" + query + ".txt"
        else:
            query = "FT = " + grant_number_full
            filename = "grant search results xml/" + query.replace("/",
                                                                   "") + ".txt"

        file_list.append(filename)

        if not os.path.exists(filename):
            print(query)

            # Search on WOS
            results = wok_soap.search(query, SID)
            [counter, SID] = counter_check(counter, SID)

            queryId = results[0]
            results_count = results[1]

            # Interpret raw search results stored in 4th line of object
            results_unicode = results[3]

            if results_count > 100:
                retrieve_count = (results_count // 100)

                if results_count % 100 == 0:
                    retrieve_count -= 1

                for hundred in range(retrieve_count):
                    start_count = (100 * hundred) + 101
                    more_results = wok_soap.retrieve(queryId, SID, start_count,
                                                     "FullRecord")

                    [counter, SID] = counter_check(counter, SID)
                    more_results_unicode = more_results[0].encode('utf-8')
                    results_unicode = results_unicode[:
                                                      -10] + more_results_unicode[
                                                          86:]

            root = ET.fromstring(results_unicode)
            length = len(root)
            if length != results_count:
                raise

            # Write raw search results to txt file
            with open(filename, "w") as f:
                f.write(results_unicode)

    return [grant_list, file_list, counter]
示例#5
0
def searchByGrantOrDOI(csv_file, searchType):
    directory = "search by grant or doi xml/"
    if not os.path.exists(directory): # Check for and create a directory
        os.makedirs(directory)

    column1List = []

    with open(csv_file) as h: # Open a CSV file
        text = csv.reader(h)
        column1List = [row[0].replace(u'\ufeff','') for row in text] # gets rid of '\ufeff' at beginning of csv

    counter = 0
    SID = ""

    # define queryList, a list of queries
    queryList = []
    file_list = []

    # === Handle second argument, searchType ====
    searchType = searchType.lower() # converts the string to lowercase
    acceptableSearchTypes = ["grant", "doi"] # later can add author, etc
    if searchType not in acceptableSearchTypes: # raise error if grantOrDOI is not a grant or a doi
        raise Exception("Second argument of searchByGrantOrDOI must be 'grant' or 'doi'")


    # CREATE QUERY
    if searchType == "grant": # === Create grant query ===
        for fullNumber in column1List:

            if fullNumber[0:2] == "DE":
                prefix = fullNumber[3:5]
                grantNumber = fullNumber[5:]
                query = "FT = " + prefix + grantNumber + " OR FT = " + prefix + " " + grantNumber
            else:
                query = "FT = " + str(fullNumber)

            queryList.append(query)


    elif searchType == "doi": #=== Create DOI query ===
        for result in column1List:
            print(result)
            ID = result.strip(' \t\n\r').replace(" ","").replace(u'\u200b','') #remove non-printing characters
            print("ID is " + ID)
            if ID[0:3] == "WOS": # Define query
                query = "UT = " + ID
                print("query = " + str(query))
            else:
                query = 'DO = "' + ID + '"'
                print("query = " + str(query))

            queryList.append(query)



    for q in queryList:

        # create filename without slashes or quotes
        filename = directory + q.replace("/"," ").replace('"',"") + ".txt"
        # Add each file to file list

        file_list.append(filename)


        # Search on WOS
        if not os.path.exists(filename):

            [counter, SID] = counter_check(counter, SID)
            results = wok_soap.search(q, SID)

            queryId = results[0]
            results_count = results[1]
            results_unicode = results[3]



            # Handling throttle problems - can't get more than 100 at once
            if results_count > 100:
                retrieve_count = (results_count // 100)

                if results_count % 100 == 0:
                    retrieve_count -= 1

                for hundred in range(retrieve_count):
                    start_count = (100*hundred) + 101

                    [counter, SID] = counter_check(counter, SID)
                    more_results = wok_soap.retrieve(queryId, SID, start_count, "FullRecord")

                    more_results_unicode = more_results[0].encode('utf-8')

                    results_unicode = str(results_unicode[:-10]) + str(more_results_unicode[86:])

                root = ET.fromstring(results_unicode) # ET = element tree. results_unicode is the object that contains all the search results
                length = len(root)

                if length != results_count:
                    raise Exception("length does not equal results_count")# throw error message

            # Write raw search results to txt file
            with open(filename, "w") as f:
                f.write(results_unicode)

    print(file_list)

    return [column1List, file_list, counter] #subscription allows only 2500 records/session.