def process_request(self, request, spider): if spider.use_selenium: try: self.d = init_driver(self.exec_path) except TimeoutException: CloseSpider('PhantomJS Timeout Error!!!') print "############################ Received url request from scrapy #####" print request.url try: self.d.get(request.url) #self.d.refresh() except TimeoutException as e: print "Timeout Error" start_time = time.time() while time.time() < start_time + 15: try: prices = self.d.find_elements_by_xpath( '//*[@class="grid_block"]/div') except: print "Not found DIV ++++++++++++++++++++++++++++++++" time.sleep(0.5) continue try: values = self.d.find_elements_by_xpath( '//*[@class="grid_block"]/div/ul/li//*[@class="price ng-binding"]' ) print "Waiting to load page.." #print len(values) print values[0].text bFound = False for value in values: if value.text and not value.text is "$ 0": print ">>>>>>>>>>>>>>>>>>>>>>>>>>>" bFound = True break if bFound: break except: print "Not found VALUE --------------------------------" pass time.sleep(0.5) #raise CloseSpider('TIMEOUT ERROR') # wait = WebDriverWait(self.d, 30) # wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".category-breadcrumbs"))) resp = TextResponse(url=self.d.current_url, body=self.d.page_source, encoding='utf-8') resp.request = request.copy() self.d.quit() return resp
def fetch(url, meta=None, *args, **kwargs): """fetch url. """ resp = requests.get(url, *args, **kwargs, timeout=30) resp.encoding = 'UTF-8' rv = TextResponse(resp.url, status=resp.status_code, body=resp.text, encoding='UTF-8') rv.request = rv.follow(url, meta=meta) _set_response(rv) return rv
def process_request(self, request, spider): if request.meta['use_selenium'] == True: try: self.d.get(request.url) self.d.maximize_window() except TimeoutException as e: print "Timeout Exception." if spider.name == "gmail_sender": # Opend login window compose_elem = self.login_google("*****@*****.**", "bb") if compose_elem == None: print "Login Failed." else: print "Login Successed!!" email_list = [ "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**" ] for email in email_list: if self.open_compose(compose_elem): self.write_receiver_addresses([email]) self.write_subject("Hello.") self.write_content("This is test email.") send_button_elem = self.d.find_element_by_xpath( '//*[@aria-label="Send (Ctrl-Enter)"]') send_button_elem.click() print "Send emails...." time.sleep(5) else: break resp = TextResponse(url=self.d.current_url, body=self.d.page_source, encoding='utf-8') resp.request = request.copy() #self.d.quit() return resp