def getLeads(self): titleDivs = self.driver.find_elements_by_xpath("//h3[not(ancestor::div[@id='scholarship_intro_859'])]") for i in range(len(titleDivs)): title = titleDivs[i].get_attribute('textContent') requirements = '' sourceWebsite = '' description = '' if title != 'Quick Links' and title != 'About Us': if i == 0: description = self.driver.find_element_by_xpath("//div[@class='intro']/p").get_attribute( 'textContent') sourceWebsite = self.driver.find_element_by_xpath("//div[@class='intro']/p/a").get_attribute('href') requirements = self.driver.find_element_by_xpath( "//div[@class='intro']/following-sibling::*[1][self::ul]").get_attribute('textContent') else: j = i + 1 if self.checkIfElementExists( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[1]" % j): description = self.driver.find_element_by_xpath( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[1]" % j).get_attribute( 'textContent') if self.checkIfElementExists( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]" % j): requirements = self.driver.find_element_by_xpath( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]" % j).get_attribute( 'textContent') if self.checkIfElementExists( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[1]/a" % j): sourceWebsite = self.driver.find_element_by_xpath( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[1]/a" % j).get_attribute( 'href') elif self.checkIfElementExists( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]" % j): if self.checkIfElementExists( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]/a" % j): sourceWebsite = self.driver.find_element_by_xpath( "//h3[not(ancestor::div[@id='scholarship_intro_859'])][%s]/following-sibling::p[2][(preceding-sibling::*[1][self::p])]/a" % j).get_attribute( 'href') sourceText = RipPage.getPageSource(sourceWebsite) title = CleanText.cleanALLtheText(title) description = CleanText.cleanALLtheText(description) requirements = CleanText.cleanALLtheText(requirements) sourceText = CleanText.cleanALLtheText(sourceText) leadArray = [title, description, requirements, sourceWebsite, sourceText] self.teacherDotOrgLeadArrays.append(leadArray) self.driver.close() return self.teacherDotOrgLeadArrays
def getTitleAbstractList(self): wholeList = [] titles = self.getTitles() abstracts = self.getAbstracts() for i in range(len(abstracts)): abstract = CleanText.cleanALLtheText(abstracts[i]) title = CleanText.cleanALLtheText(titles[i]) listOfItems = [title, abstract] wholeList.append(listOfItems) return wholeList
def getResultPageInfo(self): sponsor = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Awarded By']/../../following-sibling::div/p").get_attribute('textContent')) awardAmount = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Award Amount']/../../following-sibling::div/p").get_attribute('textContent')) recipients = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Recipients']/../../following-sibling::div/p").get_attribute('textContent')) requirements = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Requirements']/../../following-sibling::div").get_attribute('textContent')) additionalInfo = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Additional Information']/../../following-sibling::div/p").get_attribute( 'textContent')) contact = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Contact']/../../following-sibling::div/p").get_attribute('textContent')) address = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Address']/../../following-sibling::div").get_attribute('textContent')) if self.checkIfElementExists("//a[@class='button secondary']"): sourceWebsite = self.driver.find_element_by_xpath("//a[@class='button secondary']").get_attribute('href') sourceText = CleanText.cleanALLtheText(RipPage.getPageSource(sourceWebsite)) else: sourceWebsite = '' sourceText = '' resultPageArray = [sponsor, awardAmount, recipients, requirements, additionalInfo, contact, address, sourceWebsite, sourceText] return resultPageArray
def getResultPageInfo(self): url = self.driver.current_url sponsor = '' awardAmount = '' recipients = '' requirements = '' additionalInfo = '' contact = '' address = '' deadlineInformation = '' if self.checkIfElementExists("//div/p/strong[text() = 'Awarded By']/../../following-sibling::div/p"): sponsor = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Awarded By']/../../following-sibling::div/p").get_attribute( 'textContent')) sponsor = re.sub('» More Info', '', sponsor) if self.checkIfElementExists("//div/p/strong[text() = 'Award Amount']/../../following-sibling::div/p"): awardAmount = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Award Amount']/../../following-sibling::div/p").get_attribute( 'textContent')) if self.checkIfElementExists("//div/p/strong[text() = 'Recipients']/../../following-sibling::div/p"): recipients = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Recipients']/../../following-sibling::div/p").get_attribute( 'textContent')) if self.checkIfElementExists("//div/p/strong[text() = 'Requirements']/../../following-sibling::div"): requirements = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Requirements']/../../following-sibling::div").get_attribute( 'textContent')) if self.checkIfElementExists( "//div/p/strong[text() = 'Additional Information']/../../following-sibling::div/p"): additionalInfo = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Additional Information']/../../following-sibling::div/p").get_attribute( 'textContent')) if self.checkIfElementExists("//div/p/strong[text() = 'Contact']/../../following-sibling::div/p"): contact = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Contact']/../../following-sibling::div/p").get_attribute('textContent')) if self.checkIfElementExists("//div/p/strong[text() = 'Address']/../../following-sibling::div"): address = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//div/p/strong[text() = 'Address']/../../following-sibling::div").get_attribute('textContent')) if self.checkIfElementExists( "//strong[text() ='Deadline Information']/following-sibling::span[@class='smalltext']"): deadlineInformation = CleanText.cleanALLtheText(self.driver.find_element_by_xpath( "//strong[text() ='Deadline Information']/following-sibling::span[@class='smalltext']").get_attribute( 'textContent')) if self.checkIfElementExists("//a[@class='button cta']"): sourceWebsite = self.driver.find_element_by_xpath("//a[@class='button cta']").get_attribute('href') sourceText = CleanText.cleanALLtheText(RipPage.getPageSource(sourceWebsite)) else: sourceWebsite = '' sourceText = '' resultPageArray = [url, sponsor, awardAmount, recipients, requirements, additionalInfo, contact, address, deadlineInformation, sourceWebsite, sourceText] return resultPageArray
def test_getListConcatenatedDescriptionEligibility(self): # set up db = SUDBConnect() keyword = 'East Asian Studies' testListConcatenatedDescriptionEligibility = GrantForwardItemsGetDatabaseInfo( keyword=keyword).getListStringConcatenatedDescriptionEligibility() firstCombo = testListConcatenatedDescriptionEligibility[0] # test rows = db.getRowsDB("select * from dbo.GrantForwardItems where Keyword='" + keyword + "'") description = CleanText.cleanALLtheText(rows[0].Description) eligibility = CleanText.cleanALLtheText(rows[0].Eligibility) testCombo = '%s %s' % (description, eligibility) self.assertEqual(testCombo, firstCombo)
def getListofListofItems(): titles = GetPivotTagsTitleAbstractEligibility.getTitles() abstracts = GetPivotTagsTitleAbstractEligibility.getAbstracts() eligibilities = GetPivotTagsTitleAbstractEligibility.getEligibilities() wholeList = [] for i in range(len(abstracts)): abstract = CleanText.cleanALLtheText(abstracts[i]) eligibility = CleanText.cleanALLtheText(eligibilities[i]) title = CleanText.cleanALLtheText(titles[i]) listOfItems = [title, abstract, eligibility] wholeList.append(listOfItems) return wholeList
def getLeads(self): self.expandSeeMore() arrayOfAmountObjects = self.driver.find_elements_by_xpath( "//div[@class='amount']/span[@data-bind='text: Aequitas.toCurrency(DollarAmount)']") arrayOfTitleObjects = self.driver.find_elements_by_xpath( "//h4[@data-bind='text: $parent.resultLayout ? shortTitle : Title']") arrayOfDeadlineObjects = self.driver.find_elements_by_xpath( "//h4[@data-bind='text: $parent.resultLayout ? shortTitle : Title']") titlesList = self.getTitlesList(arrayOfTitleObjects) amountsList = self.getAmountsList(arrayOfAmountObjects) deadlinesList = self.getDeadlinesList(arrayOfDeadlineObjects) for i in range(len(titlesList)): title = CleanText.cleanALLtheText(titlesList[i]) amount = CleanText.cleanALLtheText(amountsList[i]) deadline = CleanText.cleanALLtheText(deadlinesList[i]) self.driver.get(self.base_url + 'match/scholarshipresult') self.driver.implicitly_wait(2) self.expandSeeMore() arrayOfClickResultObjects = self.driver.find_elements_by_xpath( "//a[@data-bind='click: function(scholarship, event) { $parent.showScholarshipDetail(scholarship, event) }']") if arrayOfClickResultObjects[i]: objectToClick = arrayOfClickResultObjects[i] objectToClick.click() self.driver.implicitly_wait(2) resultPageArray = self.getResultPageInfo() sponsor = resultPageArray[0] awardAmount = resultPageArray[1] recipients = resultPageArray[2] requirements = resultPageArray[3] additionalInfo = resultPageArray[4] contact = resultPageArray[5] address = resultPageArray[6] sourceWebsite = resultPageArray[7] sourceText = resultPageArray[8] leadArray = [title, amount, deadline, sponsor, awardAmount, recipients, requirements, additionalInfo, contact, address, sourceWebsite, sourceText] self.unigoLeadsArray.append(leadArray) self.driver.quit() return self.unigoLeadsArray
def __init__(self, iefaLeadArray, fundingClassification, badScholarshipClassification): self.iefaLeadArray = iefaLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassificaion = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = self.iefaLeadArray[0] self.url = self.iefaLeadArray[1] self.url = CleanText.replaceSingleQuotesWithTwoSingleQuotes(self.url) self.sponsor = self.iefaLeadArray[2] self.submissionDeadline = self.iefaLeadArray[3] self.majors = self.iefaLeadArray[4] self.amount = self.iefaLeadArray[5] self.description = self.iefaLeadArray[6] self.otherCriteria = self.iefaLeadArray[7] self.numberAwards = self.iefaLeadArray[8] self.hostInstitution = self.iefaLeadArray[9] self.includes = self.iefaLeadArray[10] self.nationalityRequired = self.iefaLeadArray[11] self.hostCountries = self.iefaLeadArray[12] self.sourceWebsite = self.iefaLeadArray[13] self.sourceText = self.iefaLeadArray[14] self.date = time.strftime('%Y%m%d')
def doSingleArraysForUnevenNumberElements(self): stringOfMatchedDivParts = self.driver.find_elements_by_xpath( "//h3[contains(concat(' ', @class, ' '), 'r')]/following-sibling::div/div" ) for element in stringOfMatchedDivParts: elementParts = element.text.split('\n', 2) elementTitle = '' elementLink = '' elementDescription = '' if len(elementParts) == 3: elementLink = elementParts[0] elementTitle = elementParts[1] elementDescription = elementParts[2] elif len(elementParts) == 2: elementLink = elementParts[0] elementDescription = elementParts[1] if not re.search('^https?://', elementLink): elementLink = 'http://' + elementLink singleResultArray = [ elementTitle, elementLink, CleanText.replaceSingleQuotesWithTwoSingleQuotes( elementDescription) ] self.arrayOfGoogleLeads.append(singleResultArray)
def getTitlesList(self): titleDivs = self.driver.find_elements_by_xpath("//h2[@class='col-xs-12']") titlesList = [titleDiv.get_attribute('textContent') for titleDiv in titleDivs] titlesList = [CleanText.cleanALLtheText(title) for title in titlesList] return titlesList
def goToResultPageAndPullInformation(self, resultPageLink): self.driver.get(resultPageLink) self.driver.implicitly_wait(2) description = '' sponsor = '' amount = '' eligibility = '' submissionInfo = '' categories = '' sourceWebsite = '' sourceText = '' deadline = '' if self.checkIfElementExists("//div[@id = 'field-description']/div[@class = 'content-collapsed']"): description = self.driver.find_element_by_xpath( "//div[@id = 'field-description']/div[@class = 'content-collapsed']").get_attribute('textContent') description = CleanText.cleanALLtheText(description) if self.checkIfElementExists("//div[@class = 'sponsor-content']/div/a"): sponsor = self.driver.find_element_by_xpath("//div[@class = 'sponsor-content']/div/a").get_attribute( 'textContent') sponsor = CleanText.cleanALLtheText(sponsor) if self.checkIfElementExists("//div[@id = 'field-amount_info']/div[@class = 'content-collapsed']"): amount = self.driver.find_element_by_xpath( "//div[@id = 'field-amount_info']/div[@class = 'content-collapsed']").get_attribute('textContent') amount = CleanText.cleanALLtheText(amount) if self.checkIfElementExists("//div[@id = 'field-eligibility']/div[@class = 'content-collapsed']"): eligibility = self.driver.find_element_by_xpath( "//div[@id = 'field-eligibility']/div[@class = 'content-collapsed']").get_attribute('textContent') eligibility = CleanText.cleanALLtheText(eligibility) if self.checkIfElementExists("//div[@id = 'field-submission_info']/div[@class = 'content-collapsed']"): submissionInfo = self.driver.find_element_by_xpath( "//div[@id = 'field-submission_info']/div[@class = 'content-collapsed']").get_attribute('textContent') submissionInfo = CleanText.cleanALLtheText(submissionInfo) if self.checkIfElementExists("//div[@id = 'field-subjects']/ul"): categories = self.driver.find_element_by_xpath("//div[@id = 'field-subjects']/ul").get_attribute( 'textContent') categories = CleanText.cleanALLtheText(categories) if self.checkIfElementExists("//a[@class = 'source-link btn btn-warning']"): sourceWebsite = self.driver.find_element_by_xpath( "//a[@class = 'source-link btn btn-warning']").get_attribute('href') sourceText = CleanText.cleanALLtheText(RipPage.getPageSource(sourceWebsite)) if self.checkIfElementExists("//div[@class='table-responsive deadline-tables']/table/tbody"): deadline = self.driver.find_element_by_xpath( "//div[@class='table-responsive deadline-tables']/table/tbody").get_attribute('textContent') deadline = CleanText.cleanALLtheText(deadline) resultPageInfo = [description, sponsor, amount, eligibility, submissionInfo, categories, sourceWebsite, sourceText, deadline] return resultPageInfo
def test_ListOfItemsList(self): # set up db = SUDBConnect() keyword = 'Accounting' testListTitleAbstractEligibilityPivotId = PivotLeadsGetDatabaseInfo( keyword).getTitleAbstractList() firstList = testListTitleAbstractEligibilityPivotId[0] testTitle = firstList[0] testAbstract = firstList[1] # test rows = db.getRowsDB("select * from dbo.PivotLeads where Keyword='" + keyword + "'") title = CleanText.cleanALLtheText(rows[0].Name) abstract = CleanText.cleanALLtheText(rows[0].Abstract) self.assertEqual(title, testTitle) self.assertEqual(abstract, testAbstract)
def getAmountsList(self): amountsList = [] amountsDivs = self.driver.find_elements_by_xpath("//div[@class='amount']") for amountDiv in amountsDivs: amountsList.append(CleanText.cleanALLtheText(re.sub('Amount', '', amountDiv.get_attribute('textContent')))) return amountsList
def getTitlesList(self): titlesList = [] titlesDivs = self.driver.find_elements_by_xpath("//div[@class='main-details clearfix']/h2/a") for title in titlesDivs: titlesList.append(CleanText.cleanALLtheText(title.get_attribute('textContent'))) return titlesList
def test_getListConcatenatedDescriptionEligibility(self): # set up db = SUDBConnect() keyword = 'East Asian Studies' testListConcatenatedDescriptionEligibility = GrantForwardItemsGetDatabaseInfo( keyword=keyword).getListStringConcatenatedDescriptionEligibility() firstCombo = testListConcatenatedDescriptionEligibility[0] # test rows = db.getRowsDB( "select * from dbo.GrantForwardItems where Keyword='" + keyword + "'") description = CleanText.cleanALLtheText(rows[0].Description) eligibility = CleanText.cleanALLtheText(rows[0].Eligibility) testCombo = '%s %s' % (description, eligibility) self.assertEqual(testCombo, firstCombo)
def test_eligibilitiesList(self): dbinfo = GetDatabaseInfoScholarshipsWithClassStatuses('Senior') self.assertIsNotNone(dbinfo) eligibilitesList = dbinfo.getEligibilitiesList() self.assertIsNotNone(eligibilitesList) testEligibility = eligibilitesList[0] testCleanText = CleanText.cleanALLtheText(testEligibility) self.assertIsNotNone(testCleanText)
def test_ListOfItemsList(self): # set up db = SUDBConnect() keyword = 'Accounting' testListTitleDescriptionEligibilityPivotId = GrantForwardItemsGetDatabaseInfo( keyword).getTitleDescriptionList() firstList = testListTitleDescriptionEligibilityPivotId[0] testTitle = firstList[0] testDescription = firstList[1] # test rows = db.getRowsDB("select * from dbo.GrantForwardItems where Keyword='" + keyword + "'") title = CleanText.cleanALLtheText(rows[0].Name) description = CleanText.cleanALLtheText(rows[0].Description) self.assertEqual(title, testTitle) self.assertEqual(description, testDescription)
def test_scholarshipsDescriptionsList(self): dbinfo = GetDatabaseInfoScholarshipsWithClassStatuses('Junior') self.assertIsNotNone(dbinfo) descriptionsList = dbinfo.getScholarshipDescriptionsList() self.assertIsNotNone(descriptionsList) testDescription = descriptionsList[0] testCleanText = CleanText.cleanALLtheText(testDescription) self.assertIsNotNone(testCleanText)
def getEligibilitiesList(self): eligibilityDivs = self.driver.find_elements_by_xpath("//div[@class='col-md-10 col-md-offset-1']/div[1]/p[2]") eligbilitiesList = [eligibilityDiv.get_attribute('textContent') for eligibilityDiv in eligibilityDivs] eligbilitiesList = [re.sub('Who can apply\? ', '', eligibility) for eligibility in eligbilitiesList] eligbilitiesList = [CleanText.cleanALLtheText(eligibility) for eligibility in eligbilitiesList] return eligbilitiesList
def getSourceWebsitesAndSourceTexts(self): sourceWebsiteDivs = self.driver.find_elements_by_xpath("//div[@class='col-xs-8 col-xs-offset-2']/a") sourceWebsitesList = [sourceWebsiteDiv.get_attribute('href') for sourceWebsiteDiv in sourceWebsiteDivs] sourceTextsList = [RipPage.getPageSource(sourceWebsite) for sourceWebsite in sourceWebsitesList] sourceTextsList = [CleanText.cleanALLtheText(sourceText) for sourceText in sourceTextsList] return sourceWebsitesList, sourceTextsList
def getAwardsList(self): awardDivs = self.driver.find_elements_by_xpath("//div[@class='col-md-10 col-md-offset-1']/div[2]/p[1]") awardsList = [awardDiv.get_attribute('textContent') for awardDiv in awardDivs] awardsList = [re.sub('Awards: ', '', award) for award in awardsList] awardsList = [CleanText.cleanALLtheText(award) for award in awardsList] return awardsList
def getDescriptionsList(self): descriptionDivs = self.driver.find_elements_by_xpath("//div[@class='col-md-10 col-md-offset-1']/div[1]/p[1]") descriptionsList = [descriptionDiv.get_attribute('textContent') for descriptionDiv in descriptionDivs] descriptionsList = [re.sub('Details: ', '', description) for description in descriptionsList] descriptionsList = [CleanText.cleanALLtheText(description) for description in descriptionsList] return descriptionsList
def getNumAwardsList(self): numAwardsList = [] numAwardsDivs = self.driver.find_elements_by_xpath("//div[@class='award-count']") for awardDiv in numAwardsDivs: numAwardsList.append( CleanText.cleanALLtheText(re.sub('# Awards', '', awardDiv.get_attribute('textContent')))) return numAwardsList
def getAmountsList(self): amountsList = [] amountsDivs = self.driver.find_elements_by_xpath("//div[@class='award']/p[not (@class='label')]") for amount in amountsDivs: amountsList.append(amount.get_attribute('textContent')) amountsList = [CleanText.cleanALLtheText(amount) for amount in amountsList] return amountsList
def getDeadlinesList(self): deadlinesList = [] deadlinesDiv = self.driver.find_elements_by_xpath("//div[@class='deadline']/p[not (@class='label')]") for deadline in deadlinesDiv: deadlinesList.append(deadline.get_attribute('textContent')) deadlinesList = [CleanText.cleanALLtheText(deadline) for deadline in deadlinesList] return deadlinesList
def getTitlesList(self): titlesList = [] titlesDivs = self.driver.find_elements_by_xpath("//h3/a") for title in titlesDivs: titlesList.append(title.get_attribute('textContent')) titlesList = [CleanText.cleanALLtheText(title) for title in titlesList] return titlesList
def getSponsorsList(self): sponsorsList = [] sponsorsDivs = self.driver.find_elements_by_xpath("//div[@class='provided_by']/p[not (@class='label')]") for sponsor in sponsorsDivs: sponsorsList.append(sponsor.get_attribute('textContent')) sponsorsList = [CleanText.cleanALLtheText(sponsor) for sponsor in sponsorsList] return sponsorsList
def doSingleArraysForSameNumberElements(self): for i in range(len(self.arrayOfTitles)): elementTitle = self.arrayOfTitles[i].text elementLink = self.arrayOfLinks[i] elementDescription = self.arrayOfDescriptions[i].text singleResultArray = [elementTitle, elementLink, CleanText.replaceSingleQuotesWithTwoSingleQuotes(elementDescription)] self.arrayOfGoogleLeads.append(singleResultArray)
def test_GetPivotTagsTitleAbstractEligibilityListItems(self): # set up db = SUDBConnect() testListItems = GetPivotTagsTitleAbstractEligibility.getListofListofItems() firstList = testListItems[0] testtitle = firstList[0] testabstract = firstList[1] testeligibility = firstList[2] # test rows = db.getRowsDB("select * from dbo.PivotTags") title = CleanText.cleanALLtheText(rows[0].Name) abstract = CleanText.cleanALLtheText(rows[0].Abstract) eligibility = CleanText.cleanALLtheText(rows[0].Eligibility) self.assertEqual(title, testtitle) self.assertEqual(abstract, testabstract) self.assertEqual(eligibility, testeligibility)
def getInfoFromScholarshipPage(self, url): self.driver.get(url) self.driver.implicitly_wait(2) description = '' eligibility = '' amountInfo = '' deadlineInfo = '' sourceWebsite = '' sourceText = '' if self.checkIfElementExists("//div[@class='entry-content']/p[1]"): description = self.driver.find_element_by_xpath("//div[@class='entry-content']/p[1]").get_attribute( 'textContent') description = CleanText.cleanALLtheText(description) if self.checkIfElementExists( "//div[@class='entry-content']/p/strong[text() = 'Who is eligible to apply?']/../following-sibling::ul[1]"): eligibility = self.driver.find_element_by_xpath( "//div[@class='entry-content']/p/strong[text() = 'Who is eligible to apply?']/../following-sibling::ul[1]").get_attribute( 'textContent') eligibility = CleanText.cleanALLtheText(eligibility) if self.checkIfElementExists( "//div[@class='entry-content']/p/strong[text() = 'How much is each scholarship worth?']/../following-sibling::p[1]"): amountInfo = self.driver.find_element_by_xpath( "//div[@class='entry-content']/p/strong[text() = 'How much is each scholarship worth?']/../following-sibling::p[1]").get_attribute( 'textContent') amountInfo = CleanText.cleanALLtheText(amountInfo) if self.checkIfElementExists( "//div[@class='entry-content']/p/strong[text() = 'When is the deadline to apply?']/../following-sibling::ul[1]"): deadlineInfo = self.driver.find_element_by_xpath( "//div[@class='entry-content']/p/strong[text() = 'When is the deadline to apply?']/../following-sibling::ul[1]").get_attribute( 'textContent') deadlineInfo = CleanText.cleanALLtheText(deadlineInfo) if self.checkIfElementExists("//span[@class='apply']/a"): sourceWebsite = self.driver.find_element_by_xpath("//span[@class='apply']/a").get_attribute('href') sourceText = RipPage.getPageSource(sourceWebsite) sourceText = CleanText.cleanALLtheText(sourceText) scholarshipPageInfoArray = [description, eligibility, amountInfo, deadlineInfo, sourceWebsite, sourceText] return scholarshipPageInfoArray
def getScholarshipDescriptionsList(self): scholarshipDescriptionsList = [] for row in self.rows: scholarshipDescriptionsList.append(row.ScholarshipDescription) scholarshipDescriptionsList = [CleanText.cleanALLtheText(description) for description in scholarshipDescriptionsList if type(description) == str] return scholarshipDescriptionsList
def getLeads(self): arrayOfTitleLinkDivs = self.driver.find_elements_by_xpath( "//td[@class='f']/../preceding-sibling::tr[1]/td[@class='a']/a") arrayOfDateDescriptionDivs = self.driver.find_elements_by_xpath("//tr/td[@class='f']/../td") titlesList = self.getTitlesList(arrayOfTitleLinkDivs) linksList = self.getLinksList(arrayOfTitleLinkDivs) dueDatesList = self.getDueDates(arrayOfDateDescriptionDivs) descriptionsList = self.getDescriptionsList(arrayOfDateDescriptionDivs) for i in range(len(titlesList)): title = CleanText.cleanALLtheText(titlesList[i]) link = linksList[i] dueDate = dueDatesList[i] description = CleanText.cleanALLtheText(descriptionsList[i]) sourceText = CleanText.cleanALLtheText(RipPage.getPageSource(link)) scholarshipArray = [title, description, dueDate, link, sourceText] self.fatomeiLeadsArray.append(scholarshipArray)
def getAmountsList(self): amountsList = [] amountsDivs = self.driver.find_elements_by_xpath("//div[@class='scholarship__amount']") for amount in amountsDivs: amountsList.append(amount.get_attribute('textContent')) amountsList = [CleanText.cleanALLtheText(amount) for amount in amountsList] return amountsList
def getTitlesList(self): titlesList = [] titlesDivs = self.driver.find_elements_by_xpath( "//div[@class='main-details clearfix']/h2/a") for title in titlesDivs: titlesList.append( CleanText.cleanALLtheText(title.get_attribute('textContent'))) return titlesList
def getTitlesList(self): titleDivs = self.driver.find_elements_by_xpath( "//h2[@class='col-xs-12']") titlesList = [ titleDiv.get_attribute('textContent') for titleDiv in titleDivs ] titlesList = [CleanText.cleanALLtheText(title) for title in titlesList] return titlesList
def test_ListOfItemsList(self): # set up db = SUDBConnect() keyword = 'Accounting' testListTitleDescriptionEligibilityPivotId = GrantForwardItemsGetDatabaseInfo( keyword).getTitleDescriptionList() firstList = testListTitleDescriptionEligibilityPivotId[0] testTitle = firstList[0] testDescription = firstList[1] # test rows = db.getRowsDB( "select * from dbo.GrantForwardItems where Keyword='" + keyword + "'") title = CleanText.cleanALLtheText(rows[0].Name) description = CleanText.cleanALLtheText(rows[0].Description) self.assertEqual(title, testTitle) self.assertEqual(description, testDescription)
def getDeadlinesList(self): deadlinesList = [] deadlinesDivs = self.driver.find_elements_by_xpath("//span[@class='due']") for deadline in deadlinesDivs: deadlinesList.append(deadline.get_attribute('textContent')) deadlinesList = [re.sub('Due:', '', deadline) for deadline in deadlinesList] deadlinesList = [CleanText.cleanALLtheText(deadline) for deadline in deadlinesList] return deadlinesList
def getDeadlinesList(self): deadlinesList = [] deadlinesDivs = self.driver.find_elements_by_xpath("//div[@class='scholarship__deadline']") for deadline in deadlinesDivs: deadlinesList.append(deadline.get_attribute('textContent')) deadlinesList = [CleanText.cleanALLtheText(deadline) for deadline in deadlinesList] return deadlinesList
def getEligibilitiesList(self): eligibilitiesList = [] for row in self.rows: eligibilitiesList.append(row.Eligibility) eligibilitiesList = [CleanText.cleanALLtheText(eligibility) for eligibility in eligibilitiesList if type(eligibility) == str] return eligibilitiesList
def test_GetPivotTagsTitleAbstractEligibilityListItems(self): # set up db = SUDBConnect() testListItems = GetPivotTagsTitleAbstractEligibility.getListofListofItems( ) firstList = testListItems[0] testtitle = firstList[0] testabstract = firstList[1] testeligibility = firstList[2] # test rows = db.getRowsDB("select * from dbo.PivotTags") title = CleanText.cleanALLtheText(rows[0].Name) abstract = CleanText.cleanALLtheText(rows[0].Abstract) eligibility = CleanText.cleanALLtheText(rows[0].Eligibility) self.assertEqual(title, testtitle) self.assertEqual(abstract, testabstract) self.assertEqual(eligibility, testeligibility)
def getAmountsList(self): amountsList = [] amountsDivs = self.driver.find_elements_by_xpath("//span[@class='amount']") for amount in amountsDivs: amountsList.append(amount.get_attribute('textContent')) amountsList = [re.sub('Amount:', '', amount) for amount in amountsList] amountsList = [CleanText.cleanALLtheText(amount) for amount in amountsList] return amountsList
def getLeads(self): arrayOfTitleLinkDivs = self.driver.find_elements_by_xpath( "//td[@class='f']/../preceding-sibling::tr[1]/td[@class='a']/a") arrayOfDateDescriptionDivs = self.driver.find_elements_by_xpath( "//tr/td[@class='f']/../td") titlesList = self.getTitlesList(arrayOfTitleLinkDivs) linksList = self.getLinksList(arrayOfTitleLinkDivs) dueDatesList = self.getDueDates(arrayOfDateDescriptionDivs) descriptionsList = self.getDescriptionsList(arrayOfDateDescriptionDivs) for i in range(len(titlesList)): title = CleanText.cleanALLtheText(titlesList[i]) link = linksList[i] dueDate = dueDatesList[i] description = CleanText.cleanALLtheText(descriptionsList[i]) sourceText = CleanText.cleanALLtheText(RipPage.getPageSource(link)) scholarshipArray = [title, description, dueDate, link, sourceText] self.fatomeiLeadsArray.append(scholarshipArray)
def getAmountsList(self): amountsList = [] amountsDivs = self.driver.find_elements_by_xpath( "//div[@class='amount']") for amountDiv in amountsDivs: amountsList.append( CleanText.cleanALLtheText( re.sub('Amount', '', amountDiv.get_attribute('textContent')))) return amountsList
def getAwardsList(self): awardDivs = self.driver.find_elements_by_xpath( "//div[@class='col-md-10 col-md-offset-1']/div[2]/p[1]") awardsList = [ awardDiv.get_attribute('textContent') for awardDiv in awardDivs ] awardsList = [re.sub('Awards: ', '', award) for award in awardsList] awardsList = [CleanText.cleanALLtheText(award) for award in awardsList] return awardsList
def doSingleArraysForSameNumberElements(self): for i in range(len(self.arrayOfTitles)): elementTitle = self.arrayOfTitles[i].text elementLink = self.arrayOfLinks[i] elementDescription = self.arrayOfDescriptions[i].text singleResultArray = [ elementTitle, elementLink, CleanText.replaceSingleQuotesWithTwoSingleQuotes( elementDescription) ] self.arrayOfGoogleLeads.append(singleResultArray)
def getTitlesListEditors(self): titlesList = [] titlesDivs = self.driver.find_elements_by_xpath( "//section[@class='scholarships']/div[@class='tab h-results']/div[@class='h-result js-has-toggle active']/div[@class='h-result-header js-toggle active']/h3" ) for title in titlesDivs: titlesList.append(title.get_attribute('textContent')) titlesList = [CleanText.cleanALLtheText(title) for title in titlesList] return titlesList
def getTitlesListScholarships(self): titlesList = [] titlesDivs = self.driver.find_elements_by_xpath( "//section[@class='scholarships']/div[@class='tab']/section/div/div/h3" ) for title in titlesDivs: titlesList.append(title.get_attribute('textContent')) titlesList = [CleanText.cleanALLtheText(title) for title in titlesList] return titlesList