Exemplo n.º 1
0
def downloadpage(url, **opt):
    logger.info()
    from . import scrapertools
    """
        Abre una url y retorna los datos obtenidos

        @param url: url que abrir.
        @type url: str
        @param post: Si contiene algun valor este es enviado mediante POST.
        @type post: str
        @param headers: Headers para la petición, si no contiene nada se usara los headers por defecto.
        @type headers: dict, list
        @param timeout: Timeout para la petición.
        @type timeout: int
        @param follow_redirects: Indica si se han de seguir las redirecciones.
        @type follow_redirects: bool
        @param cookies: Indica si se han de usar las cookies.
        @type cookies: bool
        @param replace_headers: Si True, los headers pasados por el parametro "headers" sustituiran por completo los headers por defecto.
                                Si False, los headers pasados por el parametro "headers" modificaran los headers por defecto.
        @type replace_headers: bool
        @param add_referer: Indica si se ha de añadir el header "Referer" usando el dominio de la url como valor.
        @type add_referer: bool
        @param only_headers: Si True, solo se descargarán los headers, omitiendo el contenido de la url.
        @type only_headers: bool
        @param random_headers: Si True, utiliza el método de seleccionar headers aleatorios.
        @type random_headers: bool
        @param ignore_response_code: Si es True, ignora el método para WebErrorException para error como el error 404 en veseriesonline, pero es un data funcional
        @type ignore_response_code: bool
        @return: Resultado de la petición
        @rtype: HTTPResponse

                Parametro               Tipo    Descripción
                ----------------------------------------------------------------------------------------------------------------
                HTTPResponse.sucess:    bool   True: Peticion realizada correctamente | False: Error al realizar la petición
                HTTPResponse.code:      int    Código de respuesta del servidor o código de error en caso de producirse un error
                HTTPResponse.error:     str    Descripción del error en caso de producirse un error
                HTTPResponse.headers:   dict   Diccionario con los headers de respuesta del servidor
                HTTPResponse.data:      str    Respuesta obtenida del servidor
                HTTPResponse.json:      dict    Respuesta obtenida del servidor en formato json
                HTTPResponse.time:      float  Tiempo empleado para realizar la petición

        """
    load_cookies()
    import requests

    # Headers por defecto, si no se especifica nada
    req_headers = default_headers.copy()

    # Headers pasados como parametros
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt('headers'))

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['proxy_retries_counter'] = 0
    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)

    while opt['proxy_retries_counter'] <= opt.get('proxy_retries', 1):
        response = {}
        info_dict = []
        payload = dict()
        files = {}
        file_name = ''
        opt['proxy_retries_counter'] += 1

        domain = urlparse.urlparse(url)[1]
        global CS_stat
        if domain in CF_LIST or opt.get(
                'CF', False):  #Está en la lista de CF o viene en la llamada
            from lib import cloudscraper
            session = cloudscraper.create_scraper(
            )  #El dominio necesita CloudScraper
            session.verify = True
            CS_stat = True
        else:
            session = requests.session()
            session.verify = False
            CS_stat = False

        if opt.get('cookies', True):
            session.cookies = cj
        session.headers.update(req_headers)

        # Prepara la url en caso de necesitar proxy, o si se envía "proxy_addr_forced" desde el canal
        url, proxy_data, opt = check_proxy(url, **opt)
        if opt.get('proxy_addr_forced', {}):
            session.proxies = opt['proxy_addr_forced']
        elif proxy_data.get('dict', {}):
            session.proxies = proxy_data['dict']

        inicio = time.time()

        if opt.get(
                'timeout', None
        ) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
            opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
        if opt['timeout'] == 0:
            opt['timeout'] = None

        if len(url) > 0:
            try:
                if opt.get('post', None) is not None or opt.get(
                        'file', None) is not None:
                    if opt.get('post', None) is not None:
                        ### Convert string post in dict
                        try:
                            json.loads(opt['post'])
                            payload = opt['post']
                        except:
                            if not isinstance(opt['post'], dict):
                                post = urlparse.parse_qs(opt['post'],
                                                         keep_blank_values=1)
                                payload = dict()

                                for key, value in list(post.items()):
                                    try:
                                        payload[key] = value[0]
                                    except:
                                        payload[key] = ''
                            else:
                                payload = opt['post']

                    ### Verifies 'file' and 'file_name' options to upload a buffer or a file
                    if opt.get('file', None) is not None:
                        if len(opt['file']) < 256 and os.path.isfile(
                                opt['file']):
                            if opt.get('file_name', None) is None:
                                path_file, opt['file_name'] = os.path.split(
                                    opt['file'])
                            files = {
                                'file':
                                (opt['file_name'], open(opt['file'], 'rb'))
                            }
                            file_name = opt['file']
                        else:
                            files = {
                                'file': (opt.get('file_name',
                                                 'Default'), opt['file'])
                            }
                            file_name = opt.get(
                                'file_name', 'Default') + ', Buffer de memoria'

                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    if opt.get('only_headers', False):
                        ### Makes the request with HEAD method
                        req = session.head(url,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           timeout=opt['timeout'])
                    else:
                        ### Makes the request with POST method
                        req = session.post(url,
                                           data=payload,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           files=files,
                                           timeout=opt['timeout'])

                elif opt.get('only_headers', False):
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt['timeout'])
                else:
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with GET method
                    req = session.get(url,
                                      allow_redirects=opt.get(
                                          'follow_redirects', True),
                                      timeout=opt['timeout'])

            except Exception as e:
                if not opt.get('ignore_response_code',
                               False) and not proxy_data.get('stat', ''):
                    req = requests.Response()
                    response['data'] = ''
                    response['sucess'] = False
                    info_dict.append(('Success', 'False'))
                    response['code'] = str(e)
                    info_dict.append(('Response code', str(e)))
                    info_dict.append(('Finalizado en', time.time() - inicio))
                    if not opt.get('alfa_s', False):
                        show_infobox(info_dict)
                        import traceback
                        logger.error(traceback.format_exc(1))
                    return type('HTTPResponse', (), response)
                else:
                    req = requests.Response()
                    req.status_code = str(e)

        else:
            response['data'] = ''
            response['sucess'] = False
            response['code'] = ''
            return type('HTTPResponse', (), response)

        response_code = req.status_code

        if req.headers.get('Server',
                           '').startswith('cloudflare') and response_code in [
                               429, 503, 403
                           ] and not opt.get('CF', False):
            domain = urlparse.urlparse(url)[1]
            if domain not in CF_LIST:
                opt["CF"] = True
                with open(CF_LIST_PATH, "a") as CF_File:
                    CF_File.write("%s\n" % domain)
                logger.debug("CF retry... for domain: %s" % domain)
                return downloadpage(url, **opt)

        response['data'] = req.content
        try:
            encoding = req.encoding
            if not encoding:
                encoding = 'utf8'
            if PY3 and isinstance(response['data'], bytes) and 'Content-Type' in req.headers \
                        and ('text/' in req.headers['Content-Type'] or 'json' in req.headers['Content-Type'] \
                        or 'xml' in req.headers['Content-Type']):
                response['data'] = response['data'].decode(encoding)
        except:
            import traceback
            logger.error(traceback.format_exc(1))
        try:
            if PY3 and isinstance(response['data'], bytes) and 'Content-Type' in req.headers \
                        and (not 'application' in req.headers['Content-Type'] \
                        or 'javascript' in req.headers['Content-Type']):
                response['data'] = "".join(
                    chr(x) for x in bytes(response['data']))
        except:
            import traceback
            logger.error(traceback.format_exc(1))

        try:
            if 'Content-Type' in req.headers and ('text/' in req.headers['Content-Type'] \
                        or 'json' in req.headers['Content-Type'] or 'xml' in req.headers['Content-Type']):
                response['data'] = response['data'].replace('&Aacute;', 'Á').replace('&Eacute;', 'É')\
                      .replace('&Iacute;', 'Í').replace('&Oacute;', 'Ó').replace('&Uacute;', 'Ú')\
                      .replace('&Uuml;', 'Ü').replace('&iexcl;', '¡').replace('&iquest;', '¿')\
                      .replace('&Ntilde;', 'Ñ').replace('&ntilde;', 'n').replace('&uuml;', 'ü')\
                      .replace('&aacute;', 'á').replace('&eacute;', 'é').replace('&iacute;', 'í')\
                      .replace('&oacute;', 'ó').replace('&uacute;', 'ú').replace('&ordf;', 'ª')\
                      .replace('&ordm;', 'º')
        except:
            import traceback
            logger.error(traceback.format_exc(1))

        response['url'] = req.url
        if not response['data']:
            response['data'] = ''
        try:
            response['json'] = to_utf8(req.json())
        except:
            response['json'] = dict()
        response['code'] = response_code
        response['headers'] = req.headers
        response['cookies'] = req.cookies

        info_dict, response = fill_fields_post(info_dict, req, response,
                                               req_headers, inicio)

        if opt.get('cookies', True):
            save_cookies(alfa_s=opt.get('alfa_s', False))

        is_channel = inspect.getmodule(inspect.currentframe().f_back)
        is_channel = scrapertools.find_single_match(str(is_channel),
                                                    "<module '(channels).*?'")
        if is_channel and isinstance(response_code, int):
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                if response_code > 399:
                    show_infobox(info_dict)
                    raise WebErrorException(urlparse.urlparse(url)[1])

        if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
            show_infobox(info_dict)

        # Si hay error del proxy, refresca la lista y reintenta el numero indicada en proxy_retries
        response['data'], response['sucess'], url, opt = proxy_post_processing(
            url, proxy_data, response, opt)
        if opt.get('out_break', False):
            break

    return type('HTTPResponse', (), response)
Exemplo n.º 2
0
def downloadpage(url, **opt):
    logger.info()
    """
        Abre una url y retorna los datos obtenidos

        @param url: url que abrir.
        @type url: str
        @param post: Si contiene algun valor este es enviado mediante POST.
        @type post: str
        @param headers: Headers para la petición, si no contiene nada se usara los headers por defecto.
        @type headers: dict, list
        @param timeout: Timeout para la petición.
        @type timeout: int
        @param follow_redirects: Indica si se han de seguir las redirecciones.
        @type follow_redirects: bool
        @param cookies: Indica si se han de usar las cookies.
        @type cookies: bool
        @param replace_headers: Si True, los headers pasados por el parametro "headers" sustituiran por completo los headers por defecto.
                                Si False, los headers pasados por el parametro "headers" modificaran los headers por defecto.
        @type replace_headers: bool
        @param add_referer: Indica si se ha de añadir el header "Referer" usando el dominio de la url como valor.
        @type add_referer: bool
        @param only_headers: Si True, solo se descargarán los headers, omitiendo el contenido de la url.
        @type only_headers: bool
        @param random_headers: Si True, utiliza el método de seleccionar headers aleatorios.
        @type random_headers: bool
        @param ignore_response_code: Si es True, ignora el método para WebErrorException para error como el error 404 en veseriesonline, pero es un data funcional
        @type ignore_response_code: bool
        @return: Resultado de la petición
        @rtype: HTTPResponse

                Parametro               Tipo    Descripción
                ----------------------------------------------------------------------------------------------------------------
                HTTPResponse.sucess:    bool   True: Peticion realizada correctamente | False: Error al realizar la petición
                HTTPResponse.code:      int    Código de respuesta del servidor o código de error en caso de producirse un error
                HTTPResponse.error:     str    Descripción del error en caso de producirse un error
                HTTPResponse.headers:   dict   Diccionario con los headers de respuesta del servidor
                HTTPResponse.data:      str    Respuesta obtenida del servidor
                HTTPResponse.json:      dict    Respuesta obtenida del servidor en formato json
                HTTPResponse.time:      float  Tiempo empleado para realizar la petición

        """
    load_cookies()
    import requests
    from lib import cloudscraper

    # Headers por defecto, si no se especifica nada
    req_headers = default_headers.copy()

    # Headers pasados como parametros
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt('headers'))

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['proxy_retries_counter'] = 0
    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)

    while opt['proxy_retries_counter'] <= opt.get('proxy_retries', 1):
        response = {}
        info_dict = []
        payload = dict()
        files = {}
        file_name = ''
        opt['proxy_retries_counter'] += 1

        session = cloudscraper.create_scraper()
        session.verify = False
        if opt.get('cookies', True):
            session.cookies = cj
        session.headers.update(req_headers)

        # Prepara la url en caso de necesitar proxy, o si se envía "proxies" desde el canal
        url, proxy_data, opt = check_proxy(url, **opt)
        if opt.get('proxies', None) is not None:
            session.proxies = opt['proxies']
        elif proxy_data.get('dict', {}):
            session.proxies = proxy_data['dict']

        inicio = time.time()

        if opt.get(
                'timeout', None
        ) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
            opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
        if opt['timeout'] == 0: opt['timeout'] = None

        if len(url) > 0:
            try:
                if opt.get('post', None) is not None or opt.get(
                        'file', None) is not None:
                    if opt.get('post', None) is not None:
                        ### Convert string post in dict
                        try:
                            json.loads(opt['post'])
                            payload = opt['post']
                        except:
                            if not isinstance(opt['post'], dict):
                                post = urlparse.parse_qs(opt['post'],
                                                         keep_blank_values=1)
                                payload = dict()

                                for key, value in post.items():
                                    try:
                                        payload[key] = value[0]
                                    except:
                                        payload[key] = ''
                            else:
                                payload = opt['post']

                    ### Verifies 'file' and 'file_name' options to upload un buffer o file
                    if opt.get('file', None) is not None:
                        if os.path.isfile(opt['file']):
                            if opt.get('file_name', None) is None:
                                path_file, opt['file_name'] = os.path.split(
                                    opt['file'])
                            files = {
                                'file':
                                (opt['file_name'], open(opt['file'], 'rb'))
                            }
                            file_name = opt['file']
                        else:
                            files = {
                                'file': (opt.get('file_name',
                                                 'Default'), opt['file'])
                            }
                            file_name = opt.get(
                                'file_name', 'Default') + ', Buffer de memoria'

                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    if opt.get('only_headers', False):
                        ### Makes the request with HEAD method
                        req = session.head(url,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           timeout=opt['timeout'])
                    else:
                        ### Makes the request with POST method
                        req = session.post(url,
                                           data=payload,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           files=files,
                                           timeout=opt['timeout'])

                elif opt.get('only_headers', False):
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt['timeout'])
                else:
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with GET method
                    req = session.get(url,
                                      allow_redirects=opt.get(
                                          'follow_redirects', True),
                                      timeout=opt['timeout'])

            except Exception, e:
                if not opt.get('ignore_response_code',
                               False) and not proxy_data.get('stat', ''):
                    req = requests.Response()
                    response['data'] = ''
                    response['sucess'] = False
                    info_dict.append(('Success', 'False'))
                    response['code'] = str(e)
                    info_dict.append(('Response code', str(e)))
                    info_dict.append(('Finalizado en', time.time() - inicio))
                    if not opt.get('alfa_s', False):
                        show_infobox(info_dict)
                    return type('HTTPResponse', (), response)
                else:
                    req = requests.Response()
                    req.status_code = str(e)

        else:
            response['data'] = ''
            response['sucess'] = False
            response['code'] = ''
            return type('HTTPResponse', (), response)

        response_code = req.status_code

        response['data'] = req.content
        response['url'] = req.url
        if not response['data']:
            response['data'] = ''
        try:
            response['json'] = to_utf8(req.json())
        except:
            response['json'] = dict()
        response['code'] = response_code
        response['headers'] = req.headers
        response['cookies'] = req.cookies

        info_dict, response = fill_fields_post(info_dict, req, response,
                                               req_headers, inicio)

        if opt.get('cookies', True):
            save_cookies(alfa_s=opt.get('alfa_s', False))

        is_channel = inspect.getmodule(inspect.currentframe().f_back)
        is_channel = scrapertools.find_single_match(str(is_channel),
                                                    "<module '(channels).*?'")
        if is_channel and isinstance(response_code, int):
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                if response_code > 399:
                    show_infobox(info_dict)
                    raise WebErrorException(urlparse.urlparse(url)[1])

        if not 'api.themoviedb' in url and not opt.get('alfa_s', False):
            show_infobox(info_dict)

        # Si hay error del proxy, refresca la lista y reintenta el numero indicada en proxy_retries
        response['data'], response['sucess'], url, opt = proxy_post_processing(
            url, proxy_data, response, opt)
        if opt.get('out_break', False):
            break
Exemplo n.º 3
0
    logger.info("Response code: %s" % (response["code"]))
    logger.info("Response error: %s" % (response["error"]))
    logger.info("Response data length: %s" % (len(response["data"])))
    logger.info("Response headers:")
    server_cloudflare = ""
    for header in response["headers"]:
        logger.info("- %s: %s" % (header, response["headers"][header]))
        if "cloudflare" in response["headers"][header]:
            server_cloudflare = "cloudflare"

    is_channel = inspect.getmodule(inspect.currentframe().f_back)
    # error 4xx o 5xx se lanza excepcion
    # response["code"] = 400
    if type(response["code"]) ==  int and "\\servers\\" not in str(is_channel):
        if response["code"] > 399 and (server_cloudflare == "cloudflare" and response["code"] != 503):
            raise WebErrorException(urlparse.urlparse(url)[1])

    if cookies:
        save_cookies()

    logger.info("Encoding: %s" % (response["headers"].get('content-encoding')))

    if response["headers"].get('content-encoding') == 'gzip':
        logger.info("Descomprimiendo...")
        try:
            response["data"] = gzip.GzipFile(fileobj=StringIO(response["data"])).read()
            logger.info("Descomprimido")
        except:
            logger.info("No se ha podido descomprimir")

    # Anti Cloudflare
Exemplo n.º 4
0
def downloadpage(url, **opt):
    """
        Abre una url y retorna los datos obtenidos

        @param url: url que abrir.
        @type url: str
        @param post: Si contiene algun valor este es enviado mediante POST.
        @type post: str (datos json), dict
        @param headers: Headers para la petición, si no contiene nada se usara los headers por defecto.
        @type headers: dict, list
        @param timeout: Timeout para la petición.
        @type timeout: int
        @param follow_redirects: Indica si se han de seguir las redirecciones.
        @type follow_redirects: bool
        @param cookies: Indica si se han de usar las cookies.
        @type cookies: bool
        @param replace_headers: Si True, los headers pasados por el parametro "headers" sustituiran por completo los headers por defecto.
                                Si False, los headers pasados por el parametro "headers" modificaran los headers por defecto.
        @type replace_headers: bool
        @param add_host: Indica si añadir el header Host al principio, como si fuese navegador común.
                         Desactivado por defecto, solo utilizarse con webs problemáticas (da problemas con proxies).
        @type add_host: bool
        @param add_referer: Indica si se ha de añadir el header "Referer" usando el dominio de la url como valor.
        @type add_referer: bool
        @param referer: Si se establece, agrega el header "Referer" usando el parámetro proporcionado como valor.
        @type referer: str
        @param only_headers: Si True, solo se descargarán los headers, omitiendo el contenido de la url.
        @type only_headers: bool
        @param random_headers: Si True, utiliza el método de seleccionar headers aleatorios.
        @type random_headers: bool
        @param ignore_response_code: Si es True, ignora el método para WebErrorException para error como el error 404 en veseriesonline, pero es un data funcional
        @type ignore_response_code: bool
        @param hide_infobox: Si es True, no muestra la ventana de información en el log cuando hay una petición exitosa (no hay un response_code de error).
        @type hide_infobox: bool
        @param soup: Si es True, establece un elemento BeautifulSoup en el atributo soup de HTTPResponse
        @type soup: bool
        @return: Resultado de la petición
        @rtype: HTTPResponse

                Parametro             | Tipo     | Descripción
                ----------------------|----------|-------------------------------------------------------------------------------
                HTTPResponse.sucess:  | bool     | True: Peticion realizada correctamente | False: Error al realizar la petición
                HTTPResponse.code:    | int      | Código de respuesta del servidor o código de error en caso de producirse un error
                HTTPResponse.error:   | str      | Descripción del error en caso de producirse un error
                HTTPResponse.headers: | dict     | Diccionario con los headers de respuesta del servidor
                HTTPResponse.data:    | str      | Respuesta obtenida del servidor
                HTTPResponse.json:    | dict     | Respuesta obtenida del servidor en formato json
                HTTPResponse.soup:    | bs4/None | Objeto BeautifulSoup, si se solicita. None de otra forma
                HTTPResponse.time:    | float    | Tiempo empleado para realizar la petición
    """
    global CF_LIST
    if not opt.get('alfa_s', False):
        logger.info()
    from . import scrapertools

    load_cookies(opt.get('alfa_s', False))

    cf_ua = config.get_setting('cf_assistant_ua', None)
    url = url.strip()

    # Headers por defecto, si no se especifica nada
    req_headers = OrderedDict()
    if opt.get('add_host', False):
        req_headers['Host'] = urlparse.urlparse(url).netloc
    req_headers.update(default_headers.copy())

    if opt.get('add_referer', False):
        req_headers['Referer'] = "/".join(url.split("/")[:3])

    if isinstance(opt.get('referer'), str) and '://' in opt.get('referer'):
        req_headers['Referer'] = opt.get('referer')

    # Headers pasados como parametros
    if opt.get('headers', None) is not None:
        if not opt.get('replace_headers', False):
            req_headers.update(dict(opt['headers']))
        else:
            req_headers = dict(opt('headers'))

    if opt.get('random_headers', False) or HTTPTOOLS_DEFAULT_RANDOM_HEADERS:
        req_headers['User-Agent'] = random_useragent()
    if not PY3:
        url = urllib.quote(url.encode('utf-8'), safe="%/:=&?~#+!$,;'@()*[]")
    else:
        url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    opt['proxy_retries_counter'] = 0
    opt['url_save'] = url
    opt['post_save'] = opt.get('post', None)
    if opt.get('forced_proxy_opt', None) and channel_proxy_list(url):
        if opt['forced_proxy_opt'] in ['ProxyCF', 'ProxyDirect']:
            if 'cliver' not in url:
                opt['forced_proxy_opt'] = 'ProxyJSON'
            else:
                opt['forced_proxy'] = opt['forced_proxy_opt']
        else:
            opt['forced_proxy'] = opt['forced_proxy_opt']

    while opt['proxy_retries_counter'] <= opt.get('proxy_retries', 1):
        response = {}
        info_dict = []
        payload = dict()
        files = {}
        file_name = ''
        opt['proxy_retries_counter'] += 1

        domain = urlparse.urlparse(url)[1]
        global CS_stat
        if (domain in CF_LIST or opt.get('CF', False)) and opt.get(
                'CF_test',
                True):  #Está en la lista de CF o viene en la llamada
            from lib import cloudscraper
            session = cloudscraper.create_scraper(
            )  #El dominio necesita CloudScraper
            session.verify = True
            CS_stat = True
            if cf_ua and cf_ua != 'Default' and get_cookie(
                    url, 'cf_clearance'):
                req_headers['User-Agent'] = cf_ua
        else:
            session = requests.session()
            session.verify = False
            CS_stat = False

        if opt.get('cookies', True):
            session.cookies = cj

        if not opt.get('keep_alive', True):
            #session.keep_alive =  opt['keep_alive']
            req_headers['Connection'] = "close"

        # Prepara la url en caso de necesitar proxy, o si se envía "proxy_addr_forced" desde el canal
        url, proxy_data, opt = check_proxy(url, **opt)
        if opt.get('proxy_addr_forced', {}):
            session.proxies = opt['proxy_addr_forced']
        elif proxy_data.get('dict', {}):
            session.proxies = proxy_data['dict']
        if opt.get('headers_proxy', {}):
            req_headers.update(dict(opt['headers_proxy']))

        session.headers = req_headers.copy()

        inicio = time.time()

        if opt.get(
                'timeout', None
        ) is None and HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT is not None:
            opt['timeout'] = HTTPTOOLS_DEFAULT_DOWNLOAD_TIMEOUT
        if opt['timeout'] == 0:
            opt['timeout'] = None

        if len(url) > 0:
            try:
                if opt.get('post', None) is not None or opt.get(
                        'file', None) is not None or opt.get('files', {}):
                    if opt.get('post', None) is not None:
                        ### Convert string post in dict
                        try:
                            json.loads(opt['post'])
                            payload = opt['post']
                        except Exception:
                            if not isinstance(opt['post'], dict):
                                post = urlparse.parse_qs(opt['post'],
                                                         keep_blank_values=1)
                                payload = dict()

                                for key, value in list(post.items()):
                                    try:
                                        payload[key] = value[0]
                                    except Exception:
                                        payload[key] = ''
                            else:
                                payload = opt['post']

                    ### Verifies 'file' and 'file_name' options to upload a buffer or a file
                    if opt.get('files', {}):
                        files = opt['files']
                        file_name = opt.get('file_name', 'File Object')
                    elif opt.get('file', None) is not None:
                        if len(opt['file']) < 256 and os.path.isfile(
                                opt['file']):
                            if opt.get('file_name', None) is None:
                                path_file, opt['file_name'] = os.path.split(
                                    opt['file'])
                            files = {
                                'file':
                                (opt['file_name'], open(opt['file'], 'rb'))
                            }
                            file_name = opt['file']
                        else:
                            files = {
                                'file': (opt.get('file_name',
                                                 'Default'), opt['file'])
                            }
                            file_name = opt.get(
                                'file_name', 'Default') + ', Buffer de memoria'

                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    if opt.get('only_headers', False):
                        ### Makes the request with HEAD method
                        req = session.head(url,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           timeout=opt.get('timeout', None),
                                           params=opt.get('params', {}))
                    else:
                        ### Makes the request with POST method
                        req = session.post(url,
                                           data=payload,
                                           allow_redirects=opt.get(
                                               'follow_redirects', True),
                                           files=files,
                                           timeout=opt.get('timeout', None),
                                           params=opt.get('params', {}))

                elif opt.get('only_headers', False):
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with HEAD method
                    req = session.head(url,
                                       allow_redirects=opt.get(
                                           'follow_redirects', True),
                                       timeout=opt.get('timeout', None),
                                       params=opt.get('params', {}))
                else:
                    info_dict = fill_fields_pre(url, opt, proxy_data,
                                                file_name)
                    ### Makes the request with GET method
                    req = session.get(url,
                                      allow_redirects=opt.get(
                                          'follow_redirects', True),
                                      timeout=opt.get('timeout', None),
                                      params=opt.get('params', {}))

            except Exception as e:
                if not opt.get('ignore_response_code',
                               False) and not proxy_data.get('stat', ''):
                    req = requests.Response()
                    response['data'] = ''
                    response['sucess'] = False
                    info_dict.append(('Success', 'False'))
                    response['code'] = str(e)
                    info_dict.append(('Response code', str(e)))
                    info_dict.append(('Finalizado en', time.time() - inicio))
                    if not opt.get('alfa_s', False):
                        show_infobox(info_dict)
                        import traceback
                        logger.error(traceback.format_exc(1))
                    return type('HTTPResponse', (), response)
                else:
                    req = requests.Response()
                    req.status_code = str(e)

        else:
            response['data'] = ''
            response['sucess'] = False
            response['code'] = ''
            response['soup'] = None
            return type('HTTPResponse', (), response)

        response_code = req.status_code

        if req.headers.get('Server', '').startswith('cloudflare') and response_code in [429, 503, 403] \
                        and not opt.get('CF', False) and opt.get('CF_test', True):
            domain = urlparse.urlparse(url)[1]
            if domain not in CF_LIST:
                CF_LIST += [domain]
                opt["CF"] = True
                with open(CF_LIST_PATH, "a") as CF_File:
                    CF_File.write("%s\n" % domain)
                logger.debug("CF retry... for domain: %s" % domain)
                return downloadpage(url, **opt)

        if req.headers.get('Server', '') == 'Alfa' and response_code in [429, 503, 403] \
                        and not opt.get('cf_v2', False) and opt.get('CF_test', True):
            opt["cf_v2"] = True
            logger.debug("CF Assistant retry... for domain: %s" %
                         urlparse.urlparse(url)[1])
            return downloadpage(url, **opt)

        response['data'] = req.content

        try:
            response['encoding'] = str(req.encoding).lower(
            ) if req.encoding and req.encoding is not None else None

            if opt.get('encoding') and opt.get('encoding') is not None:
                encoding = opt["encoding"]
            else:
                encoding = response['encoding']

            if not encoding:
                encoding = 'utf-8'

            if PY3 and isinstance(response['data'], bytes) and encoding is not None \
                    and ('text/' in req.headers.get('Content-Type', '') \
                        or 'json' in req.headers.get('Content-Type', '') \
                        or 'xml' in req.headers.get('Content-Type', '')):
                response['data'] = response['data'].decode(encoding)

        except Exception:
            import traceback
            logger.error(traceback.format_exc(1))

        try:
            if PY3 and isinstance(response['data'], bytes) \
                    and not ('application' in req.headers.get('Content-Type', '') \
                    or 'javascript' in req.headers.get('Content-Type', '') \
                    or 'image' in req.headers.get('Content-Type', '')):
                response['data'] = "".join(
                    chr(x) for x in bytes(response['data']))

        except Exception:
            import traceback
            logger.error(traceback.format_exc(1))

        try:
            if 'text/' in req.headers.get('Content-Type', '') \
                        or 'json' in req.headers.get('Content-Type', '') \
                        or 'xml' in req.headers.get('Content-Type', ''):
                response['data'] = response['data'].replace('&Aacute;', 'Á').replace('&Eacute;', 'É')\
                      .replace('&Iacute;', 'Í').replace('&Oacute;', 'Ó').replace('&Uacute;', 'Ú')\
                      .replace('&Uuml;', 'Ü').replace('&iexcl;', '¡').replace('&iquest;', '¿')\
                      .replace('&Ntilde;', 'Ñ').replace('&ntilde;', 'n').replace('&uuml;', 'ü')\
                      .replace('&aacute;', 'á').replace('&eacute;', 'é').replace('&iacute;', 'í')\
                      .replace('&oacute;', 'ó').replace('&uacute;', 'ú').replace('&ordf;', 'ª')\
                      .replace('&ordm;', 'º')

        except Exception:
            import traceback
            logger.error(traceback.format_exc(1))

        response['url'] = req.url

        if not response['data']:
            response['data'] = ''

        response['soup'] = None

        if opt.get("soup", False):
            try:
                from bs4 import BeautifulSoup
                response["soup"] = BeautifulSoup(req.content,
                                                 "html5lib",
                                                 from_encoding=opt.get(
                                                     'encoding',
                                                     response['encoding']))

            except Exception:
                import traceback
                logger.error("Error creando sopa")
                logger.error(traceback.format_exc())

        try:
            if 'bittorrent' not in req.headers.get('Content-Type', '') \
                        and 'octet-stream' not in req.headers.get('Content-Type', '') \
                        and 'zip' not in req.headers.get('Content-Type', '') \
                        and opt.get('json_to_utf8', True):
                response['json'] = to_utf8(req.json())

            else:
                response['json'] = dict()

        except Exception:
            response['json'] = dict()
        response['code'] = response_code
        response['headers'] = req.headers
        response['cookies'] = req.cookies

        if response['code'] == 200:
            response['sucess'] = True

        else:
            response['sucess'] = False

        if opt.get('cookies', True):
            save_cookies(alfa_s=opt.get('alfa_s', False))

        is_channel = inspect.getmodule(inspect.currentframe().f_back)
        is_channel = scrapertools.find_single_match(str(is_channel),
                                                    "<module '(channels).*?'")
        if is_channel and isinstance(response_code, int):
            if not opt.get('ignore_response_code',
                           False) and not proxy_data.get('stat', ''):
                if response_code > 399:
                    info_dict, response = fill_fields_post(
                        info_dict, req, response, req_headers, inicio)
                    show_infobox(info_dict)
                    raise WebErrorException(urlparse.urlparse(url)[1])

        info_dict, response = fill_fields_post(info_dict, req, response,
                                               req_headers, inicio)
        if not 'api.themoviedb' in url and not 'api.trakt' in url and not opt.get(
                'alfa_s', False) and not opt.get("hide_infobox"):
            show_infobox(info_dict)

        # Si hay error del proxy, refresca la lista y reintenta el numero indicada en proxy_retries
        response, url, opt = proxy_post_processing(url, proxy_data, response,
                                                   opt)

        # Si proxy ordena salir del loop, se sale
        if opt.get('out_break', False):
            break

    return type('HTTPResponse', (), response)