示例#1
0
def getUniversitiesID():
    global university_names
    global universities_id_scopus
    global universities_names_scopus

    bf.getElementByXpath(AFFILIATIONS_TAB_XPATH).click()

    for name in university_names:
        bf.getElementByXpath(AFFILIATION_INPUT_XPATH).click()
        bf.driver.find_element_by_xpath(AFFILIATION_INPUT_XPATH).clear()
        name_search = (name.split(" ("))[0]        
        bf.driver.find_element_by_xpath(AFFILIATION_INPUT_XPATH).send_keys(name_search)
        time.sleep(2)
        bf.getElementByXpath(SEARCH_AFFILIATION_BUTTON_XPATH).click()

        try:
            #click on the uni link
            bf.getElementByXpath("/html/body/div[1]/div/div[1]/div[1]/div/div[3]/form/div[4]/div[2]/div/div/div[3]/table/tbody/tr/td[1]/div/div[1]/span/a").click()

            university_id = WebDriverWait(bf.driver, 5).until(EC.element_to_be_clickable((By.CLASS_NAME, "affId"))).text
            print(university_id)
            university_id = university_id.replace("Affiliation ID: ","")
            universities_id_scopus[str(name)] = str(university_id)

            university_name = WebDriverWait(bf.driver, 5).until(EC.element_to_be_clickable((By.CLASS_NAME, "wordBreakWord"))).text
            universities_names_scopus[str(name)] = str(university_name)

        except:
            print("University name not found: "+ name)
            universities_id_scopus[str(name)] = None
            universities_names_scopus[str(name)] = None

        finally:
            bf.getElementByXpath("/html/body/div[1]/div/div[1]/header/div[2]/a/span").click() 
    return universities_id_scopus
示例#2
0
def getDataBetweenTwoYears(startYear, endYear):
    base_functions.openBrowser()
    base_functions.changeURL(qs_arab_url)
    time.sleep(5)
    base_functions.getElementByXpath(ACCEPT_BUTTON_XPATH).click()
    for year in range(startYear, endYear+1):
        data = scrapeData(qs_arab_url + str(year))
        base_functions.copyToFile(data, base_functions.results_folder_name + str(year))
    base_functions.closeBrowser()
示例#3
0
def getCitaions(uni, year):
    bf.getElementByXpath(SELECT_ALL_ARROW_XPATH).click()
    bf.getElementByXpath(SELECT_ALL_LABEL_XPATH).click()
    bf.getElementByXpath(VIEW_CITATIONS_BUTTON_XPATH).click()

    print("\nLoad:")
    waitPageLoading()
    time.sleep(2)
    waitPageLoading()

    bf.getElementById(START_YEAR_ID).click()
    year_path_map = getStartYearPaths()
    bf.getElementByXpath(year_path_map['2014']).click()                         #NOT TO BE HARDCODED. We should get current year - 5 (or 4)
    bf.getElementById(UPDATE_OVERVIEW_BUTTON_ID).click()
    waitPageLoading()

    citations = {}
    citations["uni"] = uni
    citations["publications_year"] = year
    for citation_year in range(2014,2022):                                           #NOT TO BE HARDCODED. We should get current year - 5 (or 4) till - 2
        try:
            path = "//*[@id='year_" + str(citation_year) + "']/a/span/strong"
            nb_citation = bf.driver.find_element_by_xpath(path).get_attribute("innerHTML")
        except:
            path = "//*[@id='year_" + str(citation_year) + "']"
            nb_citation = bf.getElementByXpath(path).text
        citations[str(citation_year)] = nb_citation

    return citations
示例#4
0
def getDocumentsBySubject(uni, year):
    bf.getElementById(ANALYSE_SEARCH_RESULTS_ID).click()
    bf.getElementById(ANALYSE_SUBJECT_MINIGRAPH_ID).click()

    documents = {}
    documents["uni"] = uni
    documents["year"] = year
    nb_of_subjects = len(bf.driver.find_elements_by_xpath(SUBJECT_AREA_TABLE_XPATH))

    for i in range(1,nb_of_subjects+1):
        path = "/html/body/div[1]/div/div[1]/div[2]/div/div[3]/form/div[2]/section[2]/div/div[7]/div[1]/div/table/tbody/tr[" + str(i) + "]"
        subject = bf.getElementByXpath(path + "/td[1]").text
        nb_documents = bf.getElementByXpath(path + "/td[2]/a/span").text
        documents[subject] = nb_documents
    return documents
示例#5
0
def getStartYearPaths():
    dict = {}
    i=1
    while(i<11):
        path = "/html/body/div[2]/ul/li["+str(i)+"]/div" 
        content = bf.getElementByXpath(path).text
        dict[content] = path
        i=i+1
    return dict
示例#6
0
def executeQuery(query_filename, scopusNames_filename, scopusId_filename):
    publications_list = []
    citations_list = []
    pub_per_subject_list = []
    error_list = []

    query = bf.loadFile(query_filename)
    names = bf.loadJsonFile(scopusNames_filename)
    ids = bf.loadJsonFile(scopusId_filename)
    for key in names.keys():
    #key = "King Abdulaziz University (KAU)"
        for year in range(2014,2022):                       #to be changed to only current year
            try:
                if(names[key] != None):
                    uni_query = query.replace("*UNIVERSITY_NAME*",names[key]).replace("*UNIVERSITY_ID*",ids[key]).replace("*YEAR*",str(year))
                    enterQuery(uni_query)                    
                    nb_publications = getNbOfDocuments(key,year)
                    if(int(nb_publications.replace(',',''))):
                        print("Error in" + key +" " + year)
                        error_list.append(key + "-" + str(year))
                        bf.changeURL(scopus_url)
                    else:
                        citations = getCitaions(key, year)
                        goBack()
                        documents_per_subject = getDocumentsBySubject(key, year)
                        bf.getElementByXpath("/html/body/div[1]/div[1]/div[1]/header/div[2]/a/span").click()

                        publications_list.append(nb_publications)
                        citations_list.append(citations)
                        pub_per_subject_list.append(documents_per_subject)
            except:
                    print("Error in" + key +" " + year)
                    error_list.append(key + "-" + str(year))
                    bf.changeURL(scopus_url)
    
    # print("publications_list")
    # print(publications_list)
    # print("citations_list")
    # print(citations_list)
    # print("pub_per_subject_list")
    # print(pub_per_subject_list)

    return [publications_list, citations_list, pub_per_subject_list, error_list]
示例#7
0
def goBackToResults():
    bf.getElementByXpath(BACK_TORESULTS_BUTTON_XPATH).click()
示例#8
0
def goBack():
    bf.getElementByXpath(BACK_BUTTON_XPATH).click()
示例#9
0
def enterQuery(query):
    bf.driver.find_element_by_xpath(QUERY_STRING_INPUT_XPATH).clear()
    bf.driver.find_element_by_xpath(QUERY_STRING_INPUT_XPATH).send_keys(query)
    bf.getElementByXpath(SEARCH_BUTTON_XPATH).click()
示例#10
0
def gotoAdvancedSearch():
    bf.getElementByXpath(ADVANCED_SEARCH_BUTTON_XPATH).click()
示例#11
0
def login():
    bf.getElementByXpath(SIGNIN_BUTTON_XPATH).click()
    bf.driver.find_element_by_xpath(EMAIL_INPUT_XPATH).send_keys(scopus_email_address)
    bf.getElementByXpath(CONTINUE_BUTTON_XPATH).click()
    bf.driver.find_element_by_xpath(PASSWORD_INPUT_XPATH).send_keys(scopus_password)
    bf.getElementByXpath(SUBMIT_SIGN_IN_BUTTON_XPATH).click()