Exemplo n.º 1
0
def retry_if_cloudflare(response, args):
    cf = Cloudflare(response)
    if cf.is_cloudflare:
        logger.info("cloudflare detectado, esperando %s segundos..." % cf.wait_time)
        auth_url = cf.get_url()
        logger.info("Autorizando... url: %s" % auth_url)
        auth_args = args.copy()
        auth_args['url'] = auth_url
        auth_args['follow_redirects'] = False
        auth_args['headers'] = {'Referer': args['url']}
        resp = downloadpage(**auth_args)
        if resp.sucess:
            logger.info("Autorización correcta, descargando página")
            args['bypass_cloudflare'] = False
            return downloadpage(**args).__dict__
        elif resp.code == 403 and resp.headers.get('cf-chl-bypass'):
            if [a[3] for a in inspect.stack()].count('retry_if_cloudflare') > 2:
                logger.info("No se ha podido autorizar. Demasiados intentos")
                return response
            logger.info("Reintentando...")
            return downloadpage(**args).__dict__
        else:
            logger.info("No se ha podido autorizar")
    return response
Exemplo n.º 2
0
    if cookies:
        save_cookies()

    logger.info("Encoding: %s" % (response["headers"].get('content-encoding')))

    if response["headers"].get('content-encoding') == 'gzip':
        logger.info("Descomprimiendo...")
        try:
            response["data"] = gzip.GzipFile(fileobj=StringIO(response["data"])).read()
            logger.info("Descomprimido")
        except:
            logger.info("No se ha podido descomprimir")

    # Anti Cloudflare
    if bypass_cloudflare:
        cf = Cloudflare(response)
        if cf.is_cloudflare:
            logger.info("cloudflare detectado, esperando %s segundos..." % cf.wait_time)
            auth_url = cf.get_url()
            logger.info("Autorizando... url: %s" % auth_url)
            if downloadpage(auth_url, headers=request_headers, replace_headers=True).sucess:
                logger.info("Autorización correcta, descargando página")
                resp = downloadpage(url=response["url"], post=post, headers=headers, timeout=timeout,
                                    follow_redirects=follow_redirects,
                                    cookies=cookies, replace_headers=replace_headers, add_referer=add_referer)
                response["sucess"] = resp.sucess
                response["code"] = resp.code
                response["error"] = resp.error
                response["headers"] = resp.headers
                response["data"] = resp.data
                response["time"] = resp.time
Exemplo n.º 3
0
def anti_cloudflare(response, request_headers, url, post, headers, timeout,
                    follow_redirects, cookies, replace_headers, add_referer,
                    only_headers, bypass_cloudflare, count_retries,
                    count_retries_tot, random_headers, ignore_response_code,
                    alfa_s, proxy, proxy_web, proxy_addr_forced, forced_proxy,
                    proxy_retries_counter, proxy_retries):

    from core.cloudflare import Cloudflare

    cf = Cloudflare(response)
    if cf.is_cloudflare:
        count_retries += 1
        if not alfa_s:
            logger.info("cloudflare detectado, esperando %s segundos..." %
                        cf.wait_time)
        auth_url = cf.get_url()
        if not alfa_s:
            logger.info("Autorizando... intento %d url: %s" %
                        (count_retries, auth_url))
        tt = downloadpage(auth_url,
                          headers=request_headers,
                          replace_headers=True,
                          count_retries=count_retries,
                          ignore_response_code=True,
                          count_retries_tot=count_retries_tot,
                          proxy=proxy,
                          proxy_web=proxy_web,
                          forced_proxy=forced_proxy,
                          proxy_addr_forced=proxy_addr_forced,
                          proxy_retries=proxy_retries,
                          alfa_s=alfa_s)
        if tt.code == 403:
            tt = downloadpage(url,
                              headers=request_headers,
                              replace_headers=True,
                              count_retries=count_retries,
                              ignore_response_code=True,
                              count_retries_tot=count_retries_tot,
                              proxy=proxy,
                              proxy_web=proxy_web,
                              forced_proxy=forced_proxy,
                              proxy_addr_forced=proxy_addr_forced,
                              proxy_retries=proxy_retries,
                              alfa_s=alfa_s)
        if tt.sucess:
            if not alfa_s:
                logger.info("Autorización correcta, descargando página")
            resp = downloadpage(url=response["url"],
                                post=post,
                                headers=headers,
                                timeout=timeout,
                                follow_redirects=follow_redirects,
                                count_retries=count_retries,
                                cookies=cookies,
                                replace_headers=replace_headers,
                                add_referer=add_referer,
                                proxy=proxy,
                                proxy_web=proxy_web,
                                count_retries_tot=count_retries_tot,
                                forced_proxy=forced_proxy,
                                proxy_addr_forced=proxy_addr_forced,
                                proxy_retries=proxy_retries,
                                alfa_s=alfa_s)
            response["sucess"] = resp.sucess
            response["code"] = resp.code
            response["error"] = resp.error
            response["headers"] = resp.headers
            response["data"] = resp.data
            response["time"] = resp.time
            response["url"] = resp.url
        else:
            if not alfa_s:
                logger.info("No se ha podido autorizar")

    return (response, count_retries)