Пример #1
0
    def process_request(self, request, spider):
        if spider.use_selenium:
            try:
                self.d = init_driver(self.exec_path)
            except TimeoutException:
                CloseSpider('PhantomJS Timeout Error!!!')

            print "############################ Received url request from scrapy #####"
            print request.url
            try:
                self.d.get(request.url)
                #self.d.refresh()
            except TimeoutException as e:
                print "Timeout Error"

            start_time = time.time()

            while time.time() < start_time + 15:
                try:
                    prices = self.d.find_elements_by_xpath(
                        '//*[@class="grid_block"]/div')
                except:
                    print "Not found DIV ++++++++++++++++++++++++++++++++"
                    time.sleep(0.5)
                    continue

                try:
                    values = self.d.find_elements_by_xpath(
                        '//*[@class="grid_block"]/div/ul/li//*[@class="price ng-binding"]'
                    )
                    print "Waiting to load page.."
                    #print len(values)
                    print values[0].text
                    bFound = False
                    for value in values:
                        if value.text and not value.text is "$ 0":
                            print ">>>>>>>>>>>>>>>>>>>>>>>>>>>"
                            bFound = True
                            break
                    if bFound:
                        break
                except:
                    print "Not found VALUE --------------------------------"
                    pass

                time.sleep(0.5)

                #raise CloseSpider('TIMEOUT ERROR')

            # wait = WebDriverWait(self.d, 30)
            # wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".category-breadcrumbs")))

            resp = TextResponse(url=self.d.current_url,
                                body=self.d.page_source,
                                encoding='utf-8')
            resp.request = request.copy()
            self.d.quit()
            return resp
Пример #2
0
def fetch(url, meta=None, *args, **kwargs):
    """fetch url. """
    resp = requests.get(url, *args, **kwargs, timeout=30)
    resp.encoding = 'UTF-8'
    rv = TextResponse(resp.url,
                      status=resp.status_code,
                      body=resp.text,
                      encoding='UTF-8')
    rv.request = rv.follow(url, meta=meta)
    _set_response(rv)
    return rv
Пример #3
0
    def process_request(self, request, spider):
        if request.meta['use_selenium'] == True:
            try:
                self.d.get(request.url)
                self.d.maximize_window()
            except TimeoutException as e:
                print "Timeout Exception."

            if spider.name == "gmail_sender":
                # Opend login window
                compose_elem = self.login_google("*****@*****.**", "bb")
                if compose_elem == None:
                    print "Login Failed."
                else:
                    print "Login Successed!!"

                    email_list = [
                        "*****@*****.**", "*****@*****.**", "*****@*****.**",
                        "*****@*****.**"
                    ]

                    for email in email_list:
                        if self.open_compose(compose_elem):
                            self.write_receiver_addresses([email])
                            self.write_subject("Hello.")
                            self.write_content("This is test email.")
                            send_button_elem = self.d.find_element_by_xpath(
                                '//*[@aria-label="Send ‪(Ctrl-Enter)‬"]')
                            send_button_elem.click()
                            print "Send emails...."
                            time.sleep(5)
                        else:
                            break

            resp = TextResponse(url=self.d.current_url,
                                body=self.d.page_source,
                                encoding='utf-8')
            resp.request = request.copy()
            #self.d.quit()
            return resp