def handle_captcha(self, response, solver): sel = scrapy.Selector(response) iframe_src = sel.xpath(self.CAPTCHA_XPATH).extract()[0] iframe_url = urljoin(response.url, iframe_src) iframe_request = scrapy.Request(iframe_url) iframe_response = yield download(self.crawler, iframe_request) iframe_sel = scrapy.Selector(iframe_response) img_src, = iframe_sel.xpath('//img/@src').extract()[:1] or [None] if img_src is None: raise DecaptchaError('No //img/@src found on CAPTCHA page') img_url = urljoin(iframe_response.url, img_src) img_request = scrapy.Request(img_url) img_response = yield download(self.crawler, img_request) scrapy.log.msg('CAPTCHA image downloaded, solving') captcha_text = yield solver.solve(img_response.body) scrapy.log.msg('CAPTCHA solved: %s' % captcha_text) challenge_request = scrapy.FormRequest.from_response( iframe_response, formxpath='//form', formdata={'recaptcha_response_field': captcha_text} ) challenge_response = yield download(self.crawler, challenge_request) challenge_sel = scrapy.Selector(challenge_response) challenge, = challenge_sel.xpath( '//textarea/text()' ).extract()[:1] or [None] if not challenge: raise DecaptchaError('Bad challenge from reCAPTCHA API:\n%s' % challenge_response.body) scrapy.log.msg('CAPTCHA solved, submitting challenge') submit_request = scrapy.FormRequest.from_response( response, formxpath='//form[.%s]' % self.CAPTCHA_XPATH, formdata={'recaptcha_challenge_field': challenge} ) yield download(self.crawler, submit_request)
def solve(self, captcha_image): formdata = { 'username': self.username, 'password': self.password, 'captchafile': 'base64:' + b64encode(captcha_image) } request = scrapy.FormRequest(self.api_url, formdata=formdata) response = yield download(self.crawler, request) # Redirecting must be enabled poll_url = response.url for retry in xrange(self.poll_times): poll_request = scrapy.Request( poll_url, dont_filter=True, headers={'Accept': 'application/json'}) poll_response = yield download(self.crawler, poll_request) try: poll_data = json.loads(poll_response.body) except ValueError: raise CaptchaIncorrectlySolved( 'Deathbycaptcha returned ' 'non-JSON response ({}): {}'.format( poll_response.status, poll_response.body)) if poll_data['is_correct'] is False: raise CaptchaIncorrectlySolved('Deathbycaptcha returned ' 'is_correct=false') if poll_data['text']: returnValue(poll_data['text']) raise CaptchaSolveTimeout('Deathbycaptcha did not solve CAPTCHA ' 'in time')
def handle_captcha(self, response, solver): sel = scrapy.Selector(response) iframe_src = sel.xpath(self.CAPTCHA_XPATH).extract()[0] iframe_url = urljoin(response.url, iframe_src) iframe_request = scrapy.Request(iframe_url) iframe_response = yield download(self.crawler, iframe_request) iframe_sel = scrapy.Selector(iframe_response) img_src, = iframe_sel.xpath('//img/@src').extract()[:1] or [None] if img_src is None: raise DecaptchaError('No //img/@src found on CAPTCHA page') img_url = urljoin(iframe_response.url, img_src) img_request = scrapy.Request(img_url) img_response = yield download(self.crawler, img_request) scrapy.log.msg('CAPTCHA image downloaded, solving') captcha_text = yield solver.solve(img_response.body) scrapy.log.msg('CAPTCHA solved: %s' % captcha_text) challenge_request = scrapy.FormRequest.from_response( iframe_response, formxpath='//form', formdata={'recaptcha_response_field': captcha_text}) challenge_response = yield download(self.crawler, challenge_request) challenge_sel = scrapy.Selector(challenge_response) challenge, = challenge_sel.xpath( '//textarea/text()').extract()[:1] or [None] if not challenge: raise DecaptchaError('Bad challenge from reCAPTCHA API:\n%s' % challenge_response.body) scrapy.log.msg('CAPTCHA solved, submitting challenge') submit_request = scrapy.FormRequest.from_response( response, formxpath='//form[.%s]' % self.CAPTCHA_XPATH, formdata={'recaptcha_challenge_field': challenge}) yield download(self.crawler, submit_request)
def solve(self, captcha_image): formdata = { "username": self.username, "password": self.password, "captchafile": "base64:" + b64encode(captcha_image), } request = scrapy.FormRequest(self.api_url, formdata=formdata) response = yield download(self.crawler, request) # Redirecting must be enabled poll_url = response.url for retry in xrange(self.poll_times): poll_request = scrapy.Request(poll_url, dont_filter=True, headers={"Accept": "application/json"}) poll_response = yield download(self.crawler, poll_request) try: poll_data = json.loads(poll_response.body) except ValueError: raise CaptchaIncorrectlySolved( "Deathbycaptcha returned " "non-JSON response ({}): {}".format(poll_response.status, poll_response.body) ) if poll_data["is_correct"] is False: raise CaptchaIncorrectlySolved("Deathbycaptcha returned " "is_correct=false") if poll_data["text"]: returnValue(poll_data["text"]) raise CaptchaSolveTimeout("Deathbycaptcha did not solve CAPTCHA " "in time")
def solve(self, site_key, page_url, data_s=None): formdata = { 'key': self.apikey, 'method': 'userrecaptcha', 'googlekey': site_key, 'pageurl': page_url } if data_s: formdata['data-s'] = data_s request = scrapy.FormRequest(self.api_url + 'in.php', formdata=formdata) response = yield download(self.crawler, request) try: captcha_id = response.body.split('|')[1] except Exception: raise CaptchaIncorrectlySolved( '2captcha returned non-parsable captcha request response ({}): {}' .format(response.status, response.body)) poll_url = self.api_url + 'res.php?key={}&action=get&id={}'.format( self.apikey, captcha_id) for retry in xrange(self.poll_times): poll_request = scrapy.Request(poll_url, dont_filter=True) poll_response = yield download(self.crawler, poll_request) if not 'CAPCHA_NOT_READY' in poll_response.body: try: result = poll_response.body.split('|')[1] returnValue(result) except Exception: # ERROR_CAPTCHA_UNSOLVABLE raise CaptchaIncorrectlySolved( '2captcha returned non-parsable captcha poll response ({}): {}' .format(poll_response.status, poll_response.body)) raise CaptchaSolveTimeout('2captcha did not solve CAPTCHA in time')
def handle_captcha(self, response, solver, v2_solver): sel = scrapy.Selector(response) form = sel.xpath(self.CAPTCHA_FORM_XPATH) if form: container = form[0] form_response = response captcha_field = 'captcha' else: iframe_src = sel.xpath(self.CAPTCHA_XPATH).extract()[0] iframe_url = urljoin(response.url, iframe_src) iframe_request = scrapy.Request(iframe_url) iframe_response = yield download(self.crawler, iframe_request) container = scrapy.Selector(iframe_response) form_response = iframe_response captcha_field = 'recaptcha_response_field' img_src, = container.xpath('//img/@src').extract()[:1] or [None] if img_src is None: site_key = sel.xpath(self.CAPTCHA_SITEKEY_XPATH).extract() if not site_key: raise DecaptchaError('No //img/@src found on CAPTCHA page and no sitekey found') site_key = site_key[0] data_s = sel.xpath(self.CAPTCHA_DATA_S_XPATH).extract() data_s = data_s[0] if data_s else None logger.info("RECAPTCHA v2 found: sitekey=%s data-s=%s", site_key, data_s) # v2_solver needed if not v2_solver: raise DecaptchaError('No //img/@src found on CAPTCHA page and no recaptcha v2 solver found') challenge = yield v2_solver.solve(site_key, response.url, data_s) # submit_request = scrapy.FormRequest.from_response( # response, formxpath=self.CAPTCHA_FORM_XPATH, # formdata={'g-recaptcha-response': challange} # ) # submit_response = yield download(self.crawler, submit_request) # yield download(self.crawler, response.request) new_url = response.url + '&g-recaptcha-response=' + challenge yield download(self.crawler, response.request.replace(url=new_url)) else: img_url = urljoin(form_response.url, img_src) img_request = scrapy.Request(img_url) img_response = yield download(self.crawler, img_request) logger.info('CAPTCHA image downloaded, solving') captcha_text = yield solver.solve(img_response.body) logger.info('CAPTCHA solved: %s' % captcha_text) challenge_request = scrapy.FormRequest.from_response( form_response, formxpath='//form', formdata={captcha_field: captcha_text} ) challenge_response = yield download(self.crawler, challenge_request) if form: if not challenge_response.status == 200: raise DecaptchaError('Bad challenge from reCAPTCHA API:\n%s' % challenge_response.body) else: challenge_sel = scrapy.Selector(challenge_response) challenge, = challenge_sel.xpath( '//textarea/text()' ).extract()[:1] or [None] if not challenge: raise DecaptchaError('Bad challenge from reCAPTCHA API:\n%s' % challenge_response.body) logger.info('CAPTCHA solved, submitting challenge') submit_request = scrapy.FormRequest.from_response( response, formxpath='//form[.%s]' % self.CAPTCHA_XPATH, formdata={'recaptcha_challenge_field': challenge} ) submit_response = yield download(self.crawler, submit_request) yield download(self.crawler, response.request)