Пример #1
0
 def handle_captcha(self, response, solver):
     sel = scrapy.Selector(response)
     iframe_src = sel.xpath(self.CAPTCHA_XPATH).extract()[0]
     iframe_url = urljoin(response.url, iframe_src)
     iframe_request = scrapy.Request(iframe_url)
     iframe_response = yield download(self.crawler, iframe_request)
     iframe_sel = scrapy.Selector(iframe_response)
     img_src, = iframe_sel.xpath('//img/@src').extract()[:1] or [None]
     if img_src is None:
         raise DecaptchaError('No //img/@src found on CAPTCHA page')
     img_url = urljoin(iframe_response.url, img_src)
     img_request = scrapy.Request(img_url)
     img_response = yield download(self.crawler, img_request)
     scrapy.log.msg('CAPTCHA image downloaded, solving')
     captcha_text = yield solver.solve(img_response.body)
     scrapy.log.msg('CAPTCHA solved: %s' % captcha_text)
     challenge_request = scrapy.FormRequest.from_response(
         iframe_response, formxpath='//form',
         formdata={'recaptcha_response_field': captcha_text}
     )
     challenge_response = yield download(self.crawler, challenge_request)
     challenge_sel = scrapy.Selector(challenge_response)
     challenge, = challenge_sel.xpath(
         '//textarea/text()'
     ).extract()[:1] or [None]
     if not challenge:
         raise DecaptchaError('Bad challenge from reCAPTCHA API:\n%s' %
                              challenge_response.body)
     scrapy.log.msg('CAPTCHA solved, submitting challenge')
     submit_request = scrapy.FormRequest.from_response(
         response, formxpath='//form[.%s]' % self.CAPTCHA_XPATH,
         formdata={'recaptcha_challenge_field': challenge}
     )
     yield download(self.crawler, submit_request)
Пример #2
0
 def solve(self, captcha_image):
     formdata = {
         'username': self.username,
         'password': self.password,
         'captchafile': 'base64:' + b64encode(captcha_image)
     }
     request = scrapy.FormRequest(self.api_url, formdata=formdata)
     response = yield download(self.crawler, request)
     # Redirecting must be enabled
     poll_url = response.url
     for retry in xrange(self.poll_times):
         poll_request = scrapy.Request(
             poll_url,
             dont_filter=True,
             headers={'Accept': 'application/json'})
         poll_response = yield download(self.crawler, poll_request)
         try:
             poll_data = json.loads(poll_response.body)
         except ValueError:
             raise CaptchaIncorrectlySolved(
                 'Deathbycaptcha returned '
                 'non-JSON response ({}): {}'.format(
                     poll_response.status, poll_response.body))
         if poll_data['is_correct'] is False:
             raise CaptchaIncorrectlySolved('Deathbycaptcha returned '
                                            'is_correct=false')
         if poll_data['text']:
             returnValue(poll_data['text'])
     raise CaptchaSolveTimeout('Deathbycaptcha did not solve CAPTCHA '
                               'in time')
Пример #3
0
 def handle_captcha(self, response, solver):
     sel = scrapy.Selector(response)
     iframe_src = sel.xpath(self.CAPTCHA_XPATH).extract()[0]
     iframe_url = urljoin(response.url, iframe_src)
     iframe_request = scrapy.Request(iframe_url)
     iframe_response = yield download(self.crawler, iframe_request)
     iframe_sel = scrapy.Selector(iframe_response)
     img_src, = iframe_sel.xpath('//img/@src').extract()[:1] or [None]
     if img_src is None:
         raise DecaptchaError('No //img/@src found on CAPTCHA page')
     img_url = urljoin(iframe_response.url, img_src)
     img_request = scrapy.Request(img_url)
     img_response = yield download(self.crawler, img_request)
     scrapy.log.msg('CAPTCHA image downloaded, solving')
     captcha_text = yield solver.solve(img_response.body)
     scrapy.log.msg('CAPTCHA solved: %s' % captcha_text)
     challenge_request = scrapy.FormRequest.from_response(
         iframe_response,
         formxpath='//form',
         formdata={'recaptcha_response_field': captcha_text})
     challenge_response = yield download(self.crawler, challenge_request)
     challenge_sel = scrapy.Selector(challenge_response)
     challenge, = challenge_sel.xpath(
         '//textarea/text()').extract()[:1] or [None]
     if not challenge:
         raise DecaptchaError('Bad challenge from reCAPTCHA API:\n%s' %
                              challenge_response.body)
     scrapy.log.msg('CAPTCHA solved, submitting challenge')
     submit_request = scrapy.FormRequest.from_response(
         response,
         formxpath='//form[.%s]' % self.CAPTCHA_XPATH,
         formdata={'recaptcha_challenge_field': challenge})
     yield download(self.crawler, submit_request)
Пример #4
0
 def solve(self, captcha_image):
     formdata = {
         "username": self.username,
         "password": self.password,
         "captchafile": "base64:" + b64encode(captcha_image),
     }
     request = scrapy.FormRequest(self.api_url, formdata=formdata)
     response = yield download(self.crawler, request)
     # Redirecting must be enabled
     poll_url = response.url
     for retry in xrange(self.poll_times):
         poll_request = scrapy.Request(poll_url, dont_filter=True, headers={"Accept": "application/json"})
         poll_response = yield download(self.crawler, poll_request)
         try:
             poll_data = json.loads(poll_response.body)
         except ValueError:
             raise CaptchaIncorrectlySolved(
                 "Deathbycaptcha returned "
                 "non-JSON response ({}): {}".format(poll_response.status, poll_response.body)
             )
         if poll_data["is_correct"] is False:
             raise CaptchaIncorrectlySolved("Deathbycaptcha returned " "is_correct=false")
         if poll_data["text"]:
             returnValue(poll_data["text"])
     raise CaptchaSolveTimeout("Deathbycaptcha did not solve CAPTCHA " "in time")
Пример #5
0
 def solve(self, site_key, page_url, data_s=None):
     formdata = {
         'key': self.apikey,
         'method': 'userrecaptcha',
         'googlekey': site_key,
         'pageurl': page_url
     }
     if data_s:
         formdata['data-s'] = data_s
     request = scrapy.FormRequest(self.api_url + 'in.php',
                                  formdata=formdata)
     response = yield download(self.crawler, request)
     try:
         captcha_id = response.body.split('|')[1]
     except Exception:
         raise CaptchaIncorrectlySolved(
             '2captcha returned non-parsable captcha request response ({}): {}'
             .format(response.status, response.body))
     poll_url = self.api_url + 'res.php?key={}&action=get&id={}'.format(
         self.apikey, captcha_id)
     for retry in xrange(self.poll_times):
         poll_request = scrapy.Request(poll_url, dont_filter=True)
         poll_response = yield download(self.crawler, poll_request)
         if not 'CAPCHA_NOT_READY' in poll_response.body:
             try:
                 result = poll_response.body.split('|')[1]
                 returnValue(result)
             except Exception:
                 # ERROR_CAPTCHA_UNSOLVABLE
                 raise CaptchaIncorrectlySolved(
                     '2captcha returned non-parsable captcha poll response ({}): {}'
                     .format(poll_response.status, poll_response.body))
     raise CaptchaSolveTimeout('2captcha did not solve CAPTCHA in time')
Пример #6
0
 def handle_captcha(self, response, solver, v2_solver):
     sel = scrapy.Selector(response)
     form = sel.xpath(self.CAPTCHA_FORM_XPATH)
     if form:
         container = form[0]
         form_response = response
         captcha_field = 'captcha'
     else:
         iframe_src = sel.xpath(self.CAPTCHA_XPATH).extract()[0]
         iframe_url = urljoin(response.url, iframe_src)
         iframe_request = scrapy.Request(iframe_url)
         iframe_response = yield download(self.crawler, iframe_request)
         container = scrapy.Selector(iframe_response)
         form_response = iframe_response
         captcha_field = 'recaptcha_response_field'
     img_src, = container.xpath('//img/@src').extract()[:1] or [None]
     if img_src is None:
         site_key = sel.xpath(self.CAPTCHA_SITEKEY_XPATH).extract()
         if not site_key:
             raise DecaptchaError('No //img/@src found on CAPTCHA page and no sitekey found')
         site_key = site_key[0]
         data_s = sel.xpath(self.CAPTCHA_DATA_S_XPATH).extract()
         data_s = data_s[0] if data_s else None
         logger.info("RECAPTCHA v2 found: sitekey=%s data-s=%s", site_key, data_s)
         # v2_solver needed
         if not v2_solver:
             raise DecaptchaError('No //img/@src found on CAPTCHA page and no recaptcha v2 solver found')
         challenge = yield v2_solver.solve(site_key, response.url, data_s)
         # submit_request = scrapy.FormRequest.from_response(
         #     response, formxpath=self.CAPTCHA_FORM_XPATH,
         #     formdata={'g-recaptcha-response': challange}
         # )
         # submit_response = yield download(self.crawler, submit_request)
         # yield download(self.crawler, response.request)
         new_url = response.url + '&g-recaptcha-response=' + challenge
         yield download(self.crawler, response.request.replace(url=new_url))
     else:
         img_url = urljoin(form_response.url, img_src)
         img_request = scrapy.Request(img_url)
         img_response = yield download(self.crawler, img_request)
         logger.info('CAPTCHA image downloaded, solving')
         captcha_text = yield solver.solve(img_response.body)
         logger.info('CAPTCHA solved: %s' % captcha_text)
         challenge_request = scrapy.FormRequest.from_response(
             form_response, formxpath='//form',
             formdata={captcha_field: captcha_text}
         )
         challenge_response = yield download(self.crawler, challenge_request)
         if form:
             if not challenge_response.status == 200:
                 raise DecaptchaError('Bad challenge from reCAPTCHA API:\n%s' %
                                      challenge_response.body)
         else:
             challenge_sel = scrapy.Selector(challenge_response)
             challenge, = challenge_sel.xpath(
                 '//textarea/text()'
             ).extract()[:1] or [None]
             if not challenge:
                 raise DecaptchaError('Bad challenge from reCAPTCHA API:\n%s' %
                                      challenge_response.body)
             logger.info('CAPTCHA solved, submitting challenge')
             submit_request = scrapy.FormRequest.from_response(
                 response, formxpath='//form[.%s]' % self.CAPTCHA_XPATH,
                 formdata={'recaptcha_challenge_field': challenge}
             )
             submit_response = yield download(self.crawler, submit_request)
             yield download(self.crawler, response.request)