Пример #1
0
def getUrl(url,
           cookieJar=None,
           post=None,
           timeout=20,
           headers=None,
           noredir=False):
    cookie_handler = urllib_request.HTTPCookieProcessor(cookieJar)

    if noredir:
        opener = urllib_request.build_opener(
            NoRedirection,
            cookie_handler, urllib_request.HTTPBasicAuthHandler(),
            urllib_request.HTTPHandler())
    else:
        opener = urllib_request.build_opener(
            cookie_handler, urllib_request.HTTPBasicAuthHandler(),
            urllib_request.HTTPHandler())
    #opener = urllib_request.install_opener(opener)
    req = urllib_request.Request(url)
    req.add_header(
        'User-Agent',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36'
    )
    if headers:
        for h, hv in headers:
            req.add_header(h, hv)

    response = opener.open(req, post, timeout=timeout)
    link = response.read()
    response.close()
    return link
Пример #2
0
def get(url, cookiepath=None, cookie=None, user_agent=None, referer=None):
    # use cookies if cookiepath is set and if the cookiepath exists.
    if cookiepath is not None:
        # check if user has supplied only a folder path, or a full path
        if not os.path.isfile(cookiepath):
            # if the user supplied only a folder path, append on to the end
            # of the path a common filename.
            cookiepath = os.path.join(cookiepath, 'cookies.lwp')
        # check that the cookie exists
        if not os.path.exists(cookiepath):
            with open(cookiepath, 'w') as f:
                f.write('#LWP-Cookies-2.0\n')
        cj = http_cookiejar.LWPCookieJar()
        cj.load(cookiepath)
        req = urllib_request.Request(url)
        if user_agent:
            req.add_header('User-Agent', user_agent)
        else:
            req.add_header('User-Agent', USER_AGENT_STRING)
        if referer:
            req.add_header('Referer', referer)
        if cookie:
            req.add_header('Cookie', cookie)
        opener = urllib_request.build_opener(urllib_request.HTTPCookieProcessor(cj))
        try:
            response = opener.open(req)
        except urllib_error.URLError as e:
            xbmc.log('%s Error opening %s' % (e, url))
            sys.exit(1)
        link = response.read()
        response.close()
        return link
    else:
        return _loadwithoutcookies(url, user_agent)
Пример #3
0
    def _update_opener(self, drop_tls_level=False):
        """
        Builds and installs a new opener to be used by all future calls to
        :func:`urllib2.urlopen`.
        """
        handlers = [
            urllib_request.HTTPCookieProcessor(self._cj),
            urllib_request.HTTPBasicAuthHandler()
        ]

        if self._http_debug:
            handlers += [urllib_request.HTTPHandler(debuglevel=1)]
        else:
            handlers += [urllib_request.HTTPHandler()]

        if self._proxy:
            handlers += [urllib_request.ProxyHandler({'http': self._proxy})]

        try:
            import platform
            node = platform.node().lower()
        except:
            node = ''

        if not self._ssl_verify or node == 'xboxone':
            try:
                import ssl
                ctx = ssl.create_default_context()
                ctx.check_hostname = False
                ctx.verify_mode = ssl.CERT_NONE
                if self._http_debug:
                    handlers += [
                        urllib_request.HTTPSHandler(context=ctx, debuglevel=1)
                    ]
                else:
                    handlers += [urllib_request.HTTPSHandler(context=ctx)]
            except:
                pass
        else:
            try:
                import ssl
                import certifi
                ctx = ssl.create_default_context(cafile=certifi.where())
                if drop_tls_level:
                    ctx.protocol = ssl.PROTOCOL_TLSv1_1
                if self._http_debug:
                    handlers += [
                        urllib_request.HTTPSHandler(context=ctx, debuglevel=1)
                    ]
                else:
                    handlers += [urllib_request.HTTPSHandler(context=ctx)]
            except:
                pass

        opener = urllib_request.build_opener(*handlers)
        urllib_request.install_opener(opener)
Пример #4
0
    def _update_opener(self):
        '''
        Builds and installs a new opener to be used by all future calls to
        :func:`urllib2.urlopen`.
        '''
        if self._http_debug:
            http = urllib_request.HTTPHandler(debuglevel=1)
        else:
            http = urllib_request.HTTPHandler()

        if self._proxy:
            opener = urllib_request.build_opener(
                urllib_request.HTTPCookieProcessor(self._cj),
                urllib_request.ProxyHandler({'http': self._proxy}),
                urllib_request.HTTPBasicAuthHandler(), http)
        else:
            opener = urllib_request.build_opener(
                urllib_request.HTTPCookieProcessor(self._cj),
                urllib_request.HTTPBasicAuthHandler(), http)
            urllib_request.install_opener(opener)
Пример #5
0
def getUrl(url, proxy={}, timeout=TIMEOUT, cookies=True):
    global cs
    cookie = []
    if proxy:
        urllib_request.install_opener(
            urllib_request.build_opener(urllib_request.ProxyHandler(proxy)))
    elif cookies:
        cookie = http_cookiejar.LWPCookieJar()
        opener = urllib_request.build_opener(
            urllib_request.HTTPCookieProcessor(cookie))
        urllib_request.install_opener(opener)
    req = urllib_request.Request(url)
    req.add_header(
        'User-Agent',
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
    )
    try:
        response = urllib_request.urlopen(req, timeout=timeout)
        linkSRC = response.read()
        response.close()
    except:
        linkSRC = ''
    cs = ''.join(['%s=%s;' % (c.name, c.value) for c in cookie])
    return linkSRC
Пример #6
0
def request(url,
            close=True,
            redirect=True,
            error=False,
            proxy=None,
            post=None,
            headers=None,
            mobile=False,
            limit=None,
            referer=None,
            cookie=None,
            output='',
            timeout='30'):
    handlers = []

    if proxy is not None:
        handlers += [
            ProxyHandler({'http': '{0}'.format(proxy)}),
            urllib_request.HTTPHandler
        ]
        opener = urllib_request.build_opener(*handlers)
        urllib_request.install_opener(opener)

    if output == 'cookie' or output == 'extended' or close is not True:
        cookies = cookielib.LWPCookieJar()
        handlers += [
            urllib_request.HTTPHandler(),
            urllib_request.HTTPSHandler(),
            urllib_request.HTTPCookieProcessor(cookies)
        ]
        opener = urllib_request.build_opener(*handlers)
        urllib_request.install_opener(opener)

    try:

        if sys.version_info < (2, 7, 9):
            raise Exception()

        import ssl
        ssl_context = ssl.create_default_context()
        ssl_context.check_hostname = False
        ssl_context.verify_mode = ssl.CERT_NONE
        handlers += [urllib_request.HTTPSHandler(context=ssl_context)]
        opener = urllib_request.build_opener(*handlers)
        urllib_request.install_opener(opener)

    except:
        pass

    try:
        headers.update(headers)
    except:
        headers = {}

    if 'User-Agent' in headers:
        pass
    elif not mobile is True:
        # headers['User-Agent'] = agent()
        headers['User-Agent'] = cache.get(randomagent, 1)
    else:
        headers['User-Agent'] = 'Apple-iPhone/701.341'

    if 'Referer' in headers:
        pass
    elif referer is None:
        headers['Referer'] = '%s://%s/' % (urlparse(url).scheme,
                                           urlparse(url).netloc)
    else:
        headers['Referer'] = referer

    if not 'Accept-Language' in headers:
        headers['Accept-Language'] = 'en-US'

    if 'Cookie' in headers:
        pass
    elif cookie is not None:
        headers['Cookie'] = cookie

    if redirect is False:

        class NoRedirection(urllib_error.HTTPError):
            def http_response(self, request, response):
                return response

        opener = urllib_request.build_opener(NoRedirection)
        urllib_request.install_opener(opener)

        try:
            del headers['Referer']
        except:
            pass

    req = urllib_request.Request(url, data=post, headers=headers)

    try:
        response = urllib_request.urlopen(req, timeout=int(timeout))

    except urllib_error.HTTPError as response:

        if response.code == 503:

            if 'cf-browser-verification' in response.read(5242880):

                netloc = '%s://%s' % (urlparse(url).scheme,
                                      urlparse(url).netloc)

                cf = cache.get(cfcookie, 168, netloc, headers['User-Agent'],
                               timeout)

                headers['Cookie'] = cf

                request = urllib_request.Request(url,
                                                 data=post,
                                                 headers=headers)

                response = urllib_request.urlopen(request,
                                                  timeout=int(timeout))

            elif error is False:
                return

        elif error is False:
            return

    if output == 'cookie':

        try:
            result = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies])
        except:
            pass
        try:
            result = cf
        except:
            pass

    elif output == 'response':

        if limit == '0':
            result = (str(response.code), response.read(224 * 1024))
        elif limit is not None:
            result = (str(response.code), response.read(int(limit) * 1024))
        else:
            result = (str(response.code), response.read(5242880))

    elif output == 'chunk':

        try:
            content = int(response.headers['Content-Length'])
        except:
            content = (2049 * 1024)

        if content < (2048 * 1024):
            return
        result = response.read(16 * 1024)

    elif output == 'extended':

        try:
            cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies])
        except:
            pass
        try:
            cookie = cf
        except:
            pass
        content = response.headers
        result = response.read(5242880)
        return result, headers, content, cookie

    elif output == 'geturl':
        result = response.geturl()

    elif output == 'headers':
        content = response.headers
        return content

    else:
        if limit == '0':
            result = response.read(224 * 1024)
        elif limit is not None:
            result = response.read(int(limit) * 1024)
        else:
            result = response.read(5242880)

    if close is True:
        response.close()
    return result
Пример #7
0
def cfcookie(netloc, ua, timeout):
    try:
        headers = {'User-Agent': ua}

        req = urllib_request.Request(netloc, headers=headers)

        try:
            urllib_request.urlopen(req, timeout=int(timeout))
        except urllib_request.HTTPError as response:
            result = response.read(5242880)

        jschl = re.findall('name="jschl_vc" value="(.+?)"/>', result)[0]

        init = re.findall('setTimeout\(function\(\){\s*.*?.*:(.*?)};',
                          result)[-1]

        builder = re.findall(r"challenge-form\'\);\s*(.*)a.v", result)[0]

        decryptVal = parseJSString(init)

        lines = builder.split(';')

        for line in lines:

            if len(line) > 0 and '=' in line:

                sections = line.split('=')
                line_val = parseJSString(sections[1])
                decryptVal = int(
                    eval(str(decryptVal) + sections[0][-1] + str(line_val)))

        answer = decryptVal + len(urlparse.urlparse(netloc).netloc)

        query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % (
            netloc, jschl, answer)

        if 'type="hidden" name="pass"' in result:
            passval = re.findall('name="pass" value="(.*?)"', result)[0]
            query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % (
                netloc, quote_plus(passval), jschl, answer)
            time.sleep(5)

        cookies = cookielib.LWPCookieJar()
        handlers = [
            urllib_request.HTTPHandler(),
            urllib_request.HTTPSHandler(),
            urllib_request.HTTPCookieProcessor(cookies)
        ]
        opener = urllib_request.build_opener(*handlers)
        urllib_request.install_opener(opener)

        try:
            request = urllib_request.Request(query, headers=headers)
            urllib_request.urlopen(request, timeout=int(timeout))
        except:
            pass

        cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies])

        return cookie
    except:
        pass
Пример #8
0
def solve(url, cj, user_agent=None, wait=True):
    if user_agent is None:
        user_agent = USER_AGENT
    headers = {'User-Agent': user_agent, 'Referer': url}
    if cj is not None:
        try:
            cj.load(ignore_discard=True)
        except:
            pass
        opener = urllib_request.build_opener(
            urllib_request.HTTPCookieProcessor(cj))
        urllib_request.install_opener(opener)

    request = urllib_request.Request(url)
    for key in headers:
        request.add_header(key, headers[key])
    try:
        response = urllib_request.urlopen(request)
        html = response.read()
    except urllib_error.HTTPError as e:
        html = e.read()

    tries = 0
    while tries < MAX_TRIES:
        solver_pattern = r'var (?:s,t,o,p,b,r,e,a,k,i,n,g|t,r,a),f,\s*([^=]+)={"([^"]+)":([^}]+)};.+challenge-form\'\);.*?\n.*?;(.*?);a\.value'
        vc_pattern = 'input type="hidden" name="jschl_vc" value="([^"]+)'
        pass_pattern = 'input type="hidden" name="pass" value="([^"]+)'
        init_match = re.search(solver_pattern, html, re.DOTALL)
        vc_match = re.search(vc_pattern, html)
        pass_match = re.search(pass_pattern, html)

        if not init_match or not vc_match or not pass_match:
            xbmc.log(
                "Couldn't find attribute: init: |%s| vc: |%s| pass: |%s| No cloudflare check?"
                % (init_match, vc_match, pass_match))
            return False

        init_dict, init_var, init_equation, equations = init_match.groups()
        vc = vc_match.group(1)
        password = pass_match.group(1)

        # log_utils.log("VC is: %s" % (vc), xbmc.LOGDEBUG, COMPONENT)
        varname = (init_dict, init_var)
        result = int(solve_equation(init_equation.rstrip()))
        xbmc.log('Initial value: |%s| Result: |%s|' % (init_equation, result))

        for equation in equations.split(';'):
            equation = equation.rstrip()
            if equation[:len('.'.join(varname))] != '.'.join(varname):
                xbmc.log('Equation does not start with varname |%s|' %
                         (equation))
            else:
                equation = equation[len('.'.join(varname)):]

            expression = equation[2:]
            operator = equation[0]
            if operator not in ['+', '-', '*', '/']:
                # log_utils.log('Unknown operator: |%s|' % (equation), log_utils.LOGWARNING, COMPONENT)
                continue

            result = int(
                str(
                    eval(
                        str(result) + operator +
                        str(solve_equation(expression)))))
            # log_utils.log('intermediate: %s = %s' % (equation, result), log_utils.LOGDEBUG, COMPONENT)

        scheme = urllib_parse.urlparse(url).scheme
        domain = urllib_parse.urlparse(url).hostname
        result += len(domain)
        # log_utils.log('Final Result: |%s|' % (result), log_utils.LOGDEBUG, COMPONENT)

        if wait:
            # log_utils.log('Sleeping for 5 Seconds', log_utils.LOGDEBUG, COMPONENT)
            xbmc.sleep(5000)

        url = '%s://%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s&pass=%s' % (
            scheme, domain, vc, result, urllib_parse.quote(password))
        # log_utils.log('url: %s' % (url), log_utils.LOGDEBUG, COMPONENT)
        request = urllib_request.Request(url)
        for key in headers:
            request.add_header(key, headers[key])
        try:
            opener = urllib_request.build_opener(NoRedirection)
            urllib_request.install_opener(opener)
            response = urllib_request.urlopen(request)
            while response.getcode() in [301, 302, 303, 307]:
                if cj is not None:
                    cj.extract_cookies(response, request)

                redir_url = response.info().getheader('location')
                if not redir_url.startswith('http'):
                    base_url = '%s://%s' % (scheme, domain)
                    redir_url = urllib_parse.urljoin(base_url, redir_url)

                request = urllib_request.Request(redir_url)
                for key in headers:
                    request.add_header(key, headers[key])
                if cj is not None:
                    cj.add_cookie_header(request)

                response = urllib_request.urlopen(request)
            final = response.read()
            if 'cf-browser-verification' in final:
                # log_utils.log('CF Failure: html: %s url: %s' % (html, url), log_utils.LOGWARNING, COMPONENT)
                tries += 1
                html = final
            else:
                break
        except urllib_error.HTTPError as e:
            # log_utils.log('CloudFlare HTTP Error: %s on url: %s' % (e.code, url), log_utils.LOGWARNING, COMPONENT)
            return False
        except urllib_error.URLError as e:
            # log_utils.log('CloudFlare URLError Error: %s on url: %s' % (e, url), log_utils.LOGWARNING, COMPONENT)
            return False

    if cj is not None:
        cj.save()

    return final
Пример #9
0
def read_body_and_headers(url,
                          post=None,
                          headers=[],
                          follow_redirects=False,
                          timeout=None):
    xbmc.log("read_body_and_headers " + url, 2)

    if len(headers) == 0:
        headers.append([
            "User-Agent",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"
        ])

    # Start cookie lib
    ficherocookies = os.path.join(get_data_path(), 'cookies.dat')
    _log("read_body_and_headers cookies_file=" + ficherocookies)

    cj = None
    ClientCookie = None
    cookielib = None

    # Let's see if cookielib is available
    try:
        _log("read_body_and_headers importing cookielib")
        import cookielib
    except ImportError:
        _log("read_body_and_headers cookielib no disponible")
        # If importing cookielib fails
        # let's try ClientCookie
        try:
            _log("read_body_and_headers importing ClientCookie")
            import ClientCookie
        except ImportError:
            _log("read_body_and_headers ClientCookie not available")
            # ClientCookie isn't available either
            urlopen = urllib_request.urlopen
            Request = urllib_request.Request
        else:
            _log("read_body_and_headers ClientCookie available")
            # imported ClientCookie
            urlopen = ClientCookie.urlopen
            Request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        _log("read_body_and_headers cookielib available")
        # importing cookielib worked
        urlopen = urllib_request.urlopen
        Request = urllib_request.Request
        cj = cookielib.MozillaCookieJar()
        # This is a subclass of FileCookieJar
        # that has useful load and save methods

    if cj is not None:
        # we successfully imported
        # one of the two cookie handling modules
        _log("read_body_and_headers Cookies enabled")

        if os.path.isfile(ficherocookies):
            _log("read_body_and_headers Reading cookie file")
            # if we have a cookie file already saved
            # then load the cookies into the Cookie Jar
            try:
                cj.load(ficherocookies)
            except:
                _log("read_body_and_headers Wrong cookie file, deleting...")
                os.remove(ficherocookies)

        # Now we need to get our Cookie Jar
        # installed in the opener;
        # for fetching URLs
        if cookielib is not None:
            _log(
                "read_body_and_headers opener using urllib_request (cookielib)"
            )
            # if we use cookielib
            # then we get the HTTPCookieProcessor
            # and install the opener in urllib_request
            if not follow_redirects:
                opener = urllib_request.build_opener(
                    urllib_request.HTTPHandler(
                        debuglevel=http_debug_log_enabled),
                    urllib_request.HTTPCookieProcessor(cj),
                    NoRedirectHandler())
            else:
                opener = urllib_request.build_opener(
                    urllib_request.HTTPHandler(
                        debuglevel=http_debug_log_enabled),
                    urllib_request.HTTPCookieProcessor(cj))
            urllib_request.install_opener(opener)

        else:
            _log("read_body_and_headers opener using ClientCookie")
            # if we use ClientCookie
            # then we get the HTTPCookieProcessor
            # and install the opener in ClientCookie
            opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    # -------------------------------------------------
    # Cookies instaladas, lanza la petición
    # -------------------------------------------------

    # Contador
    inicio = time.time()

    # Diccionario para las cabeceras
    txheaders = {}
    if type(post) == dict: post = urlencode(post)
    if post:
        if isinstance(post, unicode):
            post = post.encode('utf-8', 'strict')
    if post is None:
        _log("read_body_and_headers GET request")
    else:
        _log("read_body_and_headers POST request")

    # Añade las cabeceras
    _log("read_body_and_headers ---------------------------")
    for header in headers:
        _log("read_body_and_headers header %s=%s" %
             (str(header[0]), str(header[1])))
        txheaders[header[0]] = header[1]
    _log("read_body_and_headers ---------------------------")
    if post and six.PY3:
        post = six.ensure_binary(post)
    req = Request(url, post, txheaders)
    if timeout is None:
        handle = urlopen(req)
    else:
        #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout)
        #Para todas las versiones:
        try:
            import socket
            deftimeout = socket.getdefaulttimeout()
            socket.setdefaulttimeout(timeout)
            handle = urlopen(req)
            socket.setdefaulttimeout(deftimeout)
        except:
            import sys
            for line in sys.exc_info():
                _log("%s" % line)

    # Actualiza el almacén de cookies
    if cj: cj.save(ficherocookies)

    # Lee los datos y cierra
    if handle.info().get('Content-Encoding') == 'gzip':
        buf = BytesIO(handle.read())
        f = gzip.GzipFile(fileobj=buf)
        data = f.read()
    else:
        data = handle.read()

    info = handle.info()
    _log("read_body_and_headers Response")

    returnheaders = []
    _log("read_body_and_headers ---------------------------")
    for header in info:
        _log("read_body_and_headers " + header + "=" + info[header])
        returnheaders.append([header, info[header]])
    handle.close()
    _log("read_body_and_headers ---------------------------")
    '''
    # Lanza la petición
    try:
        response = urllib_request.urlopen(req)
    # Si falla la repite sustituyendo caracteres especiales
    except:
        req = urllib_request.Request(url.replace(" ","%20"))
    
        # Añade las cabeceras
        for header in headers:
            req.add_header(header[0],header[1])

        response = urllib_request.urlopen(req)
    '''

    # Tiempo transcurrido
    fin = time.time()
    _log("read_body_and_headers Downloaded in %d seconds " %
         (fin - inicio + 1))
    if not isinstance(data, str):
        try:
            data = data.decode("utf-8", "strict")
        except:
            data = str(data)
    return data, returnheaders
Пример #10
0
def doLogin(cookiepath, username, password):

    #check if user has supplied only a folder path, or a full path
    if not os.path.isfile(cookiepath):
        # if the user supplied only a folder path, append on to the end of the
        # path a filename.
        cookiepath = os.path.join(cookiepath, 'cookies.lwp')

    #delete any old version of the cookie file
    try:
        os.remove(cookiepath)
    except:
        pass

    if username and password:

        #the url you will request to.
        login_url = 'https://fantasti.cc/signin.php'

        #the header used to pretend you are a browser
        header_string = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'

        #build the form data necessary for the login
        login_data = urllib_parse.urlencode({
            'user': username,
            'pass': password,
            'memento': 1,
            'x': 0,
            'y': 0,
            'do': 'login',
            'SSO': ''
        })

        #build the request we will make
        req = urllib_request.Request(login_url)
        req.add_header('User-Agent', header_string)

        #initiate the cookielib class
        cj = http_cookiejar.LWPCookieJar()

        # Setup no redirects
        class NoRedirection(urllib_request.HTTPRedirectHandler):
            def redirect_request(self, req, fp, code, msg, headers, newurl):
                return None

        #install cookielib into the url opener, so that cookies are handled
        opener = urllib_request.build_opener(
            urllib_request.HTTPCookieProcessor(cj), NoRedirection())
        urllib_request.install_opener(opener)
        #do the login and get the response

        try:
            source = urllib_request.urlopen(req, login_data.encode()).read()
        except urllib_error.HTTPError as e:
            source = e.read()

        #check the received html for a string that will tell us if the user is
        # logged in
        #pass the username, which can be used to do this.
        login, avatar = check_login(source, username)

        #if login suceeded, save the cookiejar to disk
        if login:
            cj.save(cookiepath)

        #return whether we are logged in or not
        return (login, avatar)

    else:
        return (False, False)
Пример #11
0
        kodilog('Failed String Lookup: %s (%s)' % (string_id, e))
        return string_id


if cj is not None:
    if os.path.isfile(TRANSLATEPATH(cookiePath)):
        try:
            cj.load(ignore_discard=True)
        except:
            try:
                xbmcvfs.delete(TRANSLATEPATH(cookiePath))
                pass
            except:
                dialog.ok(i18n('oh_oh'), i18n('cookie_lock'))
                pass
    cookie_handler = urllib_request.HTTPCookieProcessor(cj)
    handlers += [cookie_handler]

opener = urllib_request.build_opener(*handlers)
urllib_request.install_opener(opener)


class StopDownloading(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


def downloadVideo(url, name):
Пример #12
0
CODEC_UNKNOWN = 0
CODEC_XVID = 1
CODEC_H264 = 2
CODEC_H265 = 3
CODEC_MP3 = 4
CODEC_AAC = 5
CODEC_AC3 = 6
CODEC_DTS = 7
CODEC_DTSHD = 8
CODEC_DTSHDMA = 9

USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.66 Safari/537.36"

COOKIE_JAR = CookieJar()
urllib_request.install_opener(urllib_request.build_opener(urllib_request.HTTPCookieProcessor(COOKIE_JAR)))

class closing(object):
    def __init__(self, thing):
        self.thing = thing

    def __enter__(self):
        return self.thing

    def __exit__(self, *exc_info):
        self.thing.close()


def parse_json(data):
    try:
        import simplejson as json
Пример #13
0
def getRegexParsed(
        regexs,
        url,
        cookieJar=None,
        forCookieJarOnly=False,
        recursiveCall=False,
        cachedPages={},
        rawPost=False,
        cookie_jar_file=None):  #0,1,2 = URL, regexOnly, CookieJarOnly
    #cachedPages = {}
    #print 'url',url
    doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url)
    #        print 'doRegexs',doRegexs,regexs
    setresolved = True
    for k in doRegexs:
        if k in regexs:
            #print 'processing ' ,k
            m = regexs[k]
            #print m
            cookieJarParam = False
            if 'cookiejar' in m:  # so either create or reuse existing jar
                #print 'cookiejar exists',m['cookiejar']
                cookieJarParam = m['cookiejar']
                if '$doregex' in cookieJarParam:
                    cookieJar = getRegexParsed(regexs, m['cookiejar'],
                                               cookieJar, True, True,
                                               cachedPages)
                    cookieJarParam = True
                else:
                    cookieJarParam = True
            #print 'm[cookiejar]',m['cookiejar'],cookieJar
            if cookieJarParam:
                if cookieJar == None:
                    #print 'create cookie jar'
                    cookie_jar_file = None
                    if 'open[' in m['cookiejar']:
                        cookie_jar_file = m['cookiejar'].split(
                            'open[')[1].split(']')[0]
#                            print 'cookieJar from file name',cookie_jar_file

                    cookieJar = getCookieJar(cookie_jar_file)
                    #                        print 'cookieJar from file',cookieJar
                    if cookie_jar_file:
                        saveCookieJar(cookieJar, cookie_jar_file)
                    #cookieJar = http_cookiejar.LWPCookieJar()
                    #print 'cookieJar new',cookieJar
                elif 'save[' in m['cookiejar']:
                    cookie_jar_file = m['cookiejar'].split('save[')[1].split(
                        ']')[0]
                    complete_path = os.path.join(profile, cookie_jar_file)
                    #                        print 'complete_path',complete_path
                    saveCookieJar(cookieJar, cookie_jar_file)

            if m['page'] and '$doregex' in m['page']:
                pg = getRegexParsed(regexs,
                                    m['page'],
                                    cookieJar,
                                    recursiveCall=True,
                                    cachedPages=cachedPages)
                if len(pg) == 0:
                    pg = 'http://regexfailed'
                m['page'] = pg

            if 'setcookie' in m and m['setcookie'] and '$doregex' in m[
                    'setcookie']:
                m['setcookie'] = getRegexParsed(regexs,
                                                m['setcookie'],
                                                cookieJar,
                                                recursiveCall=True,
                                                cachedPages=cachedPages)
            if 'appendcookie' in m and m['appendcookie'] and '$doregex' in m[
                    'appendcookie']:
                m['appendcookie'] = getRegexParsed(regexs,
                                                   m['appendcookie'],
                                                   cookieJar,
                                                   recursiveCall=True,
                                                   cachedPages=cachedPages)

            if 'post' in m and '$doregex' in m['post']:
                m['post'] = getRegexParsed(regexs,
                                           m['post'],
                                           cookieJar,
                                           recursiveCall=True,
                                           cachedPages=cachedPages)
#                    print 'post is now',m['post']

            if 'rawpost' in m and '$doregex' in m['rawpost']:
                m['rawpost'] = getRegexParsed(regexs,
                                              m['rawpost'],
                                              cookieJar,
                                              recursiveCall=True,
                                              cachedPages=cachedPages,
                                              rawPost=True)
                #print 'rawpost is now',m['rawpost']

            if 'rawpost' in m and '$epoctime$' in m['rawpost']:
                m['rawpost'] = m['rawpost'].replace('$epoctime$',
                                                    getEpocTime())

            if 'rawpost' in m and '$epoctime2$' in m['rawpost']:
                m['rawpost'] = m['rawpost'].replace('$epoctime2$',
                                                    getEpocTime2())

            link = ''
            if m['page'] and m[
                    'page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly == False:
                #print 'using cache page',m['page']
                link = cachedPages[m['page']]
            else:
                if m['page'] and not m['page'] == '' and m['page'].startswith(
                        'http'):
                    if '$epoctime$' in m['page']:
                        m['page'] = m['page'].replace('$epoctime$',
                                                      getEpocTime())
                    if '$epoctime2$' in m['page']:
                        m['page'] = m['page'].replace('$epoctime2$',
                                                      getEpocTime2())

                    #print 'Ingoring Cache',m['page']
                    page_split = m['page'].split('|')
                    pageUrl = page_split[0]
                    header_in_page = None
                    if len(page_split) > 1:
                        header_in_page = page_split[1]

#                            if
#                            proxy = urllib_request.ProxyHandler({ ('https' ? proxytouse[:5]=="https":"http") : proxytouse})
#                            opener = urllib_request.build_opener(proxy)
#                            urllib_request.install_opener(opener)

#                        print 'urllib_request.getproxies',urllib_request.getproxies()
                    current_proxies = urllib_request.ProxyHandler(
                        urllib_request.getproxies())

                    #print 'getting pageUrl',pageUrl
                    req = urllib_request.Request(pageUrl)
                    if 'proxy' in m:
                        proxytouse = m['proxy']
                        #                            print 'proxytouse',proxytouse
                        #                            urllib_request.getproxies= lambda: {}
                        if pageUrl[:5] == "https":
                            proxy = urllib_request.ProxyHandler(
                                {'https': proxytouse})
                            #req.set_proxy(proxytouse, 'https')
                        else:
                            proxy = urllib_request.ProxyHandler(
                                {'http': proxytouse})
                            #req.set_proxy(proxytouse, 'http')
                        opener = urllib_request.build_opener(proxy)
                        urllib_request.install_opener(opener)

                    req.add_header(
                        'User-Agent',
                        'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1'
                    )
                    proxytouse = None

                    if 'referer' in m:
                        req.add_header('Referer', m['referer'])
                    if 'accept' in m:
                        req.add_header('Accept', m['accept'])
                    if 'agent' in m:
                        req.add_header('User-agent', m['agent'])
                    if 'x-req' in m:
                        req.add_header('X-Requested-With', m['x-req'])
                    if 'x-addr' in m:
                        req.add_header('x-addr', m['x-addr'])
                    if 'x-forward' in m:
                        req.add_header('X-Forwarded-For', m['x-forward'])
                    if 'setcookie' in m:
                        #                            print 'adding cookie',m['setcookie']
                        req.add_header('Cookie', m['setcookie'])
                    if 'appendcookie' in m:
                        #                            print 'appending cookie to cookiejar',m['appendcookie']
                        cookiestoApend = m['appendcookie']
                        cookiestoApend = cookiestoApend.split(';')
                        for h in cookiestoApend:
                            n, v = h.split('=')
                            w, n = n.split(':')
                            ck = http_cookiejar.Cookie(
                                version=0,
                                name=n,
                                value=v,
                                port=None,
                                port_specified=False,
                                domain=w,
                                domain_specified=False,
                                domain_initial_dot=False,
                                path='/',
                                path_specified=True,
                                secure=False,
                                expires=None,
                                discard=True,
                                comment=None,
                                comment_url=None,
                                rest={'HttpOnly': None},
                                rfc2109=False)
                            cookieJar.set_cookie(ck)
                    if 'origin' in m:
                        req.add_header('Origin', m['origin'])
                    if header_in_page:
                        header_in_page = header_in_page.split('&')
                        for h in header_in_page:
                            n, v = h.split('=')
                            req.add_header(n, v)

                    if not cookieJar == None:
                        #                            print 'cookieJarVal',cookieJar
                        cookie_handler = urllib_request.HTTPCookieProcessor(
                            cookieJar)
                        opener = urllib_request.build_opener(
                            cookie_handler,
                            urllib_request.HTTPBasicAuthHandler(),
                            urllib_request.HTTPHandler())
                        opener = urllib_request.install_opener(opener)
                        #                            print 'noredirect','noredirect' in m

                        if 'noredirect' in m:
                            opener = urllib_request.build_opener(
                                cookie_handler, NoRedirection,
                                urllib_request.HTTPBasicAuthHandler(),
                                urllib_request.HTTPHandler())
                            opener = urllib_request.install_opener(opener)
                    elif 'noredirect' in m:
                        opener = urllib_request.build_opener(
                            NoRedirection,
                            urllib_request.HTTPBasicAuthHandler(),
                            urllib_request.HTTPHandler())
                        opener = urllib_request.install_opener(opener)

                    if 'connection' in m:
                        #                            print '..........................connection//////.',m['connection']
                        from keepalive import HTTPHandler
                        keepalive_handler = HTTPHandler()
                        opener = urllib_request.build_opener(keepalive_handler)
                        urllib_request.install_opener(opener)

                    #print 'after cookie jar'
                    post = None

                    if 'post' in m:
                        postData = m['post']
                        #if '$LiveStreamRecaptcha' in postData:
                        #    (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar)
                        #    if captcha_challenge:
                        #        postData=postData.replace('$LiveStreamRecaptcha','manual_recaptcha_challenge_field:'+captcha_challenge+',recaptcha_response_field:'+catpcha_word+',id:'+idfield)
                        splitpost = postData.split(',')
                        post = {}
                        for p in splitpost:
                            n = p.split(':')[0]
                            v = p.split(':')[1]
                            post[n] = v
                        post = urllib_parse.urlencode(post)

                    if 'rawpost' in m:
                        post = m['rawpost']
                        #if '$LiveStreamRecaptcha' in post:
                        #    (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar)
                        #    if captcha_challenge:
                        #       post=post.replace('$LiveStreamRecaptcha','&manual_recaptcha_challenge_field='+captcha_challenge+'&recaptcha_response_field='+catpcha_word+'&id='+idfield)
                    link = ''
                    try:

                        if post:
                            response = urllib_request.urlopen(req, post)
                        else:
                            response = urllib_request.urlopen(req)
                        if response.info().get('Content-Encoding') == 'gzip':
                            import gzip
                            buf = six.BytesIO(response.read())
                            f = gzip.GzipFile(fileobj=buf)
                            link = f.read()
                        else:
                            link = response.read()

                        if 'proxy' in m and not current_proxies is None:
                            urllib_request.install_opener(
                                urllib_request.build_opener(current_proxies))

                        link = javascriptUnEscape(link)
                        #print repr(link)
                        #print link This just print whole webpage in LOG
                        if 'includeheaders' in m:
                            #link+=str(response.headers.get('Set-Cookie'))
                            link += '$$HEADERS_START$$:'
                            for b in response.headers:
                                link += b + ':' + response.headers.get(
                                    b) + '\n'
                            link += '$$HEADERS_END$$:'

#                        print link

                        response.close()
                    except:
                        pass
                    cachedPages[m['page']] = link
                    #print link
                    #print 'store link for',m['page'],forCookieJarOnly

                    if forCookieJarOnly:
                        return cookieJar  # do nothing
                elif m['page'] and not m['page'].startswith('http'):
                    if m['page'].startswith('$pyFunction:'):
                        val = doEval(m['page'].split('$pyFunction:')[1], '',
                                     cookieJar, m)
                        if forCookieJarOnly:
                            return cookieJar  # do nothing
                        link = val
                        link = javascriptUnEscape(link)
                    else:
                        link = m['page']

            if '$doregex' in m['expres']:
                m['expres'] = getRegexParsed(regexs,
                                             m['expres'],
                                             cookieJar,
                                             recursiveCall=True,
                                             cachedPages=cachedPages)

            if not m['expres'] == '':
                #print 'doing it ',m['expres']
                if '$LiveStreamCaptcha' in m['expres']:
                    val = askCaptcha(m, link, cookieJar)
                    #print 'url and val',url,val
                    url = url.replace("$doregex[" + k + "]", val)

                elif m['expres'].startswith(
                        '$pyFunction:') or '#$pyFunction' in m['expres']:
                    #print 'expeeeeeeeeeeeeeeeeeee',m['expres']
                    val = ''
                    if m['expres'].startswith('$pyFunction:'):
                        val = doEval(m['expres'].split('$pyFunction:')[1],
                                     link, cookieJar, m)
                    else:
                        val = doEvalFunction(m['expres'], link, cookieJar, m)
                    if 'ActivateWindow' in m['expres']: return
                    if forCookieJarOnly:
                        return cookieJar  # do nothing
                    if 'listrepeat' in m:
                        listrepeat = m['listrepeat']
                        return listrepeat, eval(val), m, regexs, cookieJar

                    try:
                        url = url.replace(u"$doregex[" + k + "]", val)
                    except:
                        url = url.replace("$doregex[" + k + "]",
                                          six.ensure_text(val))
                else:
                    if 'listrepeat' in m:
                        listrepeat = m['listrepeat']
                        ret = re.findall(m['expres'], link)
                        return listrepeat, ret, m, regexs

                    val = ''
                    if not link == '':
                        #print 'link',link
                        reg = re.compile(m['expres']).search(link)
                        try:
                            val = reg.group(1).strip()
                        except:
                            traceback.print_exc()
                    elif m['page'] == '' or m['page'] == None:
                        val = m['expres']

                    if rawPost:
                        #                            print 'rawpost'
                        val = urllib_parse.quote_plus(val)
                    if 'htmlunescape' in m:
                        #val=urllib_parse.unquote_plus(val)
                        import HTMLParser
                        val = HTMLParser.HTMLParser().unescape(val)
                    try:
                        url = url.replace("$doregex[" + k + "]", val)
                    except:
                        url = url.replace("$doregex[" + k + "]",
                                          six.ensure_text(val))
                    #print 'ur',url
                    #return val
            else:
                url = url.replace("$doregex[" + k + "]", '')
    if '$epoctime$' in url:
        url = url.replace('$epoctime$', getEpocTime())
    if '$epoctime2$' in url:
        url = url.replace('$epoctime2$', getEpocTime2())

    if '$GUID$' in url:
        import uuid
        url = url.replace('$GUID$', str(uuid.uuid1()).upper())
    if '$get_cookies$' in url:
        url = url.replace('$get_cookies$', getCookiesString(cookieJar))

    if recursiveCall: return url
    #print 'final url',repr(url)
    if url == "":
        return
    else:
        return url, setresolved
Пример #14
0
CODEC_XVID = 1
CODEC_H264 = 2
CODEC_H265 = 3
CODEC_MP3 = 4
CODEC_AAC = 5
CODEC_AC3 = 6
CODEC_DTS = 7
CODEC_DTSHD = 8
CODEC_DTSHDMA = 9

USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.66 Safari/537.36"

COOKIE_JAR = CookieJar()
urllib_request.install_opener(
    urllib_request.build_opener(
        urllib_request.HTTPCookieProcessor(COOKIE_JAR)))


class closing(object):
    def __init__(self, thing):
        self.thing = thing

    def __enter__(self):
        return self.thing

    def __exit__(self, *exc_info):
        self.thing.close()


def parse_json(data):
    try:
Пример #15
0
def downloadpage(url, post=None, headers=None, timeout=None, follow_redirects=True, cookies=True, replace_headers=False,
                 add_referer=False, only_headers=False, bypass_cloudflare=True, bypass_testcookie=True, no_decode=False,
                 method=None, cache=CACHE_ENABLED, cache_expiration=CACHE_EXPIRATION):
    """
    Descarga una página web y devuelve los resultados
    :type url: str
    :type post: dict, str
    :type headers: dict, list
    :type timeout: int
    :type follow_redirects: bool
    :type cookies: bool, dict
    :type replace_headers: bool
    :type add_referer: bool
    :type only_headers: bool
    :type bypass_cloudflare: bool
    :type cache: bool
    :type cache_expiration: timedelta
    :return: Resultado
    """
    arguments = locals().copy()

    if cache:
        try:
            cache_key = '|'.join(["%s:%s" %(k,v) for k,v in sorted(arguments.items(),key= lambda x: x[0]) if v]).encode()
            cache_key = CACHE_PREFIX + hashlib.sha1(cache_key).hexdigest()
            cacheado = CACHE.get(cache_key)
            if cacheado:
                return HTTPResponse(cacheado)
        except:
            pass

    response = {}

    # Post tipo dict
    if type(post) == dict:
        post = urllib_parse.urlencode(post)

    # Url quote
    url = urllib_parse.quote(url, safe="%/:=&?~#+!$,;'@()*[]")

    # Headers por defecto, si no se especifica nada
    request_headers = default_headers.copy()

    # Headers pasados como parametros
    if headers is not None:
        if not replace_headers:
            request_headers.update(dict(headers))
        else:
            request_headers = dict(headers)

    # Referer
    if add_referer:
        request_headers["Referer"] = "/".join(url.split("/")[:3])

    #logger("Headers:")
    #logger(request_headers, 'info')

    # Handlers
    handlers = list()
    handlers.append(HTTPHandler(debuglevel=False))
    handlers.append(HTTPSHandler(debuglevel=False))
    handlers.append(urllib_request.HTTPBasicAuthHandler())

    # No redirects
    if not follow_redirects:
        handlers.append(NoRedirectHandler())
    else:
        handlers.append(HTTPRedirectHandler())

    # Dict con cookies para la sesión
    if type(cookies) == dict:
        for name, value in cookies.items():
            if not type(value) == dict:
                value = {'value': value}
            ck = Cookie(
                version=0,
                name=name,
                value=value.get('value', ''),
                port=None,
                port_specified=False,
                domain=value.get('domain', urllib_parse.urlparse(url)[1]),
                domain_specified=False,
                domain_initial_dot=False,
                path=value.get('path', '/'),
                path_specified=True,
                secure=False,
                expires=value.get('expires', time.time() + 3600 * 24),
                discard=True,
                comment=None,
                comment_url=None,
                rest={'HttpOnly': None},
                rfc2109=False
            )
            cj.set_cookie(ck)

    if cookies:
        handlers.append(urllib_request.HTTPCookieProcessor(cj))

    # Opener
    opener = urllib_request.build_opener(*handlers)

    # Contador
    inicio = time.time()

    # Request
    req = Request(url, six.ensure_binary(post) if post else None, request_headers, method=method)

    try:
        #logger("Realizando Peticion")
        handle = opener.open(req, timeout=timeout)
        #logger('Peticion realizada')

    except HTTPError as handle:
        #logger('Peticion realizada con error')
        response["sucess"] = False
        response["code"] = handle.code
        response["error"] = handle.__dict__.get("reason", str(handle))
        response["headers"] = dict(handle.headers.items())
        response['cookies'] = get_cookies(urllib_parse.urlparse(url)[1])
        if not only_headers:
            #logger('Descargando datos...')
            response["data"] = handle.read()
        else:
            response["data"] = b""
        response["time"] = time.time() - inicio
        response["url"] = handle.geturl()

    except Exception as e:
        #logger('Peticion NO realizada')
        response["sucess"] = False
        response["code"] = e.__dict__.get("errno", e.__dict__.get("code", str(e)))
        response["error"] = e.__dict__.get("reason", str(e))
        response["headers"] = {}
        response['cookies'] = get_cookies(urllib_parse.urlparse(url)[1])
        response["data"] = b""
        response["time"] = time.time() - inicio
        response["url"] = url

    else:
        response["sucess"] = True
        response["code"] = handle.code
        response["error"] = None
        response["headers"] = dict(handle.headers.items())
        response['cookies'] = get_cookies(urllib_parse.urlparse(url)[1])
        if not only_headers:
            #logger('Descargando datos...')
            response["data"] = handle.read()
        else:
            response["data"] = b""
        response["time"] = time.time() - inicio
        response["url"] = handle.geturl()

    response['headers'] = dict([(k.lower(), v) for k, v in response['headers'].items()])

    #logger("Terminado en %.2f segundos" % (response["time"]))
    #logger("url: %s" % url)
    #logger("Response sucess     : %s" % (response["sucess"]))
    #logger("Response code       : %s" % (response["code"]))
    #logger("Response error      : %s" % (response["error"]))
    #logger("Response cookies      : %s" % (response["cookies"]))
    #logger("Response data length: %s" % (len(response["data"])))
    #logger("Response headers:")
    #logger(response['headers'])

    # Guardamos las cookies
    if cookies:
        save_cookies()

    # Gzip
    if response["headers"].get('content-encoding') == 'gzip':
        response["data"] = gzip.GzipFile(fileobj=BytesIO(response["data"])).read()

    # Binarios no se codifican ni se comprueba cloudflare, etc...
    if not is_binary(response):
        response['data'] = six.ensure_str(response['data'], errors='replace')

        if not no_decode:
            response["data"] = six.ensure_str(HTMLParser().unescape(
                six.ensure_text(response['data'], errors='replace')
            ))

        # Anti TestCookie
        if bypass_testcookie:
            if 'document.cookie="__test="+toHex(slowAES.decrypt(c,2,a,b))+"' in response['data']:
                a = re.findall('a=toNumbers\("([^"]+)"\)', response['data'])[0].decode("HEX")
                b = re.findall('b=toNumbers\("([^"]+)"\)', response['data'])[0].decode("HEX")
                c = re.findall('c=toNumbers\("([^"]+)"\)', response['data'])[0].decode("HEX")

                arguments['bypass_testcookie'] = False
                if not type(arguments['cookies']) == dict:
                    arguments['cookies'] = {'__test': ii11.new(a, ii11.MODE_CBC, b).decrypt(c).encode("HEX")}
                else:
                    arguments['cookies']['__test'] = ii11.new(a, ii11.MODE_CBC, b).decrypt(c).encode("HEX")
                response = downloadpage(**arguments).__dict__

        # Anti Cloudflare
        if bypass_cloudflare:
            response = retry_if_cloudflare(response, arguments)

    if cache:
        CACHE.set(cache_key, response, expiration=cache_expiration)

    return HTTPResponse(response)