示例#1
0
def load_more(url):
	crawler = Crawler()
	crawler.get(url)
	assert "Influencer Love | Fashion ID" in crawler.title, "TITLE INCORRECT"
	try:
		times_clicked = 0
		start = int(time())
		while True:
			# delete tab if we accidentally trip a twitter tab open
			if "Twitter" in crawler.getTitle():
				crawler.driver.find_element_by_tag_name('body').send_keys(Keys.COMMAND + 'w')
				any_button = crawler.findElementsByXPath("//a")[0]
				any_button.send_keys(Keys.COMMAND + 'w')
				crawler.closeExtraTabs()
			
			# find load more button
			load_more_button = crawler.findElementByXPath("//a[@id='ctf-more']")
			crawler.highlight("//a[@id='ctf-more']")
			crawler.click(load_more_button)
			times_clicked += 1
			print('%s CLICKS' % times_clicked)
			crawler.closeExtraTabs()

	except Exception as e:
		print('EXCEPTION', e)
	crawler.close()
	end = int(time())
	print(start)
	print(end)
	print('TOTAL TIME ELAPSED: %s' % (end - start))
示例#2
0
def random_page(url):
    crawler = Crawler()
    crawler.get(url)
    assert "Urban Dictionary" in crawler.title, "TITLE INCORRECT"
    try:
        # find random page
        random_button = crawler.findElementByXPath(
            "//a[@class='circle-button' and @href='/random.php']")
        crawler.highlight(
            "//a[@class='circle-button' and @href='/random.php']")
        crawler.click(random_button)

        # extract content
        content = {}
        content["word"] = crawler.findElementByXPath(
            "(//a[@class='word'])[1]").text
        crawler.highlight("(//a[@class='word'])[1]")
        content["meaning"] = crawler.findElementByXPath(
            "(//div[@class='meaning'])[1]").text
        crawler.highlight("(//div[@class='meaning'])[1]")
        content_dict = dumps(content)
        return content_dict
    except:
        print('MISSING', e)
    crawler.close()
示例#3
0
class CrawlerTest(unittest.TestCase):
    def setUp(self):
        self.crawler = Crawler()

    def testNavigateAndRetrieveLinks(self):
        crawler = self.crawler
        crawler.navigate("http://www.google.ca")
        ret =  crawler.findNext(".*")
        for r in ret:
            print r



    def tearDown(self):
        self.crawler.close()
示例#4
0
def get_reviews(url):
    posts = []
    crawler = Crawler()
    crawler.get(url)
    assert "Sunglasses" in crawler.title, "TITLE INCORRECT"
    try:
        close_banner = crawler.findElementByXPath(
            "//a[@class='next-dialog-close']")
        crawler.click(close_banner)

        product_details = crawler.findElementByXPath(
            "//div[@id='product-detail']")
        crawler.scrollTo(product_details)

        reviews_tab = crawler.findElementByXPath(
            "//div[@id='product-detail']//ul/li[@ae_object_type='feedback']")
        crawler.scrollTo(reviews_tab)
        crawler.highlight(
            "//div[@id='product-detail']//ul/li[@ae_object_type='feedback']")
        crawler.click(reviews_tab)

        crawler.switchFrameByXPath("//iframe[@id='product-evaluation']")

        photo_filter = crawler.findElementByXPath(
            "//label[text()[contains(.,'Photo')]]")
        crawler.click(photo_filter)
        crawler.highlight("//label[text()[contains(.,'Photo')]]")

        reviews = crawler.findElementsByXPath(
            "//div[@class='feedback-item clearfix']")
        for count, review in enumerate(reviews):
            post = {}
            post["text"] = review.find_element_by_xpath(".//dt/span").text
            print(post["text"])
            review_pic = review.find_element_by_xpath(".//img")
            post["src"] = review_pic.get_attribute("src")
            print(post["src"])
            post["file"] = f"{count}_review.png"
            urllib.request.urlretrieve(post["src"], post["file"])
            posts.append(post)
    except Exception as e:
        print('ERROR', e)
    crawler.close()
    return posts
示例#5
0
def write_reviews(url, reviews):
    crawler = Crawler()
    crawler.get(url)
    assert "Fall Animal Costumes" in crawler.title, "TITLE INCORRECT"
    for review in reviews:
        try:
            crawler.switchFrameByXPath("//iframe[@id='looxReviewsFrame']")

            write_review_button = crawler.findElementByXPath(
                "//button[@id='write']")
            crawler.scrollTo(write_review_button)
            crawler.highlight("//button[@id='write']")
            crawler.click(write_review_button)

            crawler.switchToParentFrame()

            crawler.switchFrameByXPath("//iframe[@id='looxDialog']")

            # rating review
            love_it_button = crawler.findElementByXPath(
                "//div/span[contains(text(), 'Love')]/..")
            crawler.highlight("//div/span[contains(text(), 'Love')]/..")
            crawler.click(love_it_button)

            # photo review
            upload_button = crawler.findPresentElementByXPath(
                "//input[@id='imageupload']")
            crawler.highlight("//label[contains(text(), 'Choose')]")
            crawler.click(upload_button)

            cwd = Path.cwd()
            file_name = f"{cwd}/{review['file']}"
            print(file_name)
            upload_button.send_keys(file_name)

            # generate user
            first_name = get_first_name(
                gender='male' if random() > .5 else 'female')
            last_name = get_last_name()
            email = f"{first_name}.{last_name}@gmail.com"

            # written review
            text_field = crawler.findElementByXPath("//textarea")
            crawler.highlight("//textarea")
            crawler.click(text_field)
            text_field.send_keys(review["text"])

            next_button = crawler.findElementByXPath(
                "//div[contains(text(),'Next')]")
            crawler.highlight("//div[contains(text(),'Next')]")
            crawler.click(next_button)

            first_name_field = crawler.findElementByXPath(
                "//input[@id='first_name']")
            crawler.highlight("//input[@id='first_name']")
            crawler.click(first_name_field)
            first_name_field.send_keys(first_name)
            print("FIRST NAME: ", first_name)

            last_name_field = crawler.findElementByXPath(
                "//input[@id='last_name']")
            crawler.highlight("//input[@id='last_name']")
            crawler.click(last_name_field)
            last_name_field.send_keys(last_name)
            print("LAST NAME: ", last_name)

            email_field = crawler.findElementByXPath("//input[@id='email']")
            crawler.highlight("//input[@id='email']")
            crawler.click(email_field)
            email_field.send_keys(email)

            # exit review
            done_button = crawler.findElementByXPath(
                "//button[contains(text(),'Done')]")
            crawler.highlight("//button[contains(text(),'Done')]")
            crawler.click(done_button)

            exit_button = crawler.findElementByXPath("//a[@id='close-elem']")
            crawler.highlight("//a[@id='close-elem']")
            crawler.click(exit_button)

            crawler.switchToParentFrame()
        except Exception as e:
            print('MISSING', e)
    crawler.close()