示例#1
0
def solve(url, cj, user_agent=None, name=None):
    if user_agent is None: user_agent = USER_AGENT
    headers = {'User-Agent': user_agent, 'Referer': url}
    request = urllib2.Request(url)
    for key in headers: request.add_header(key, headers[key])
    try:
        response = urllib2.urlopen(request)
        html = response.read()
    except urllib2.HTTPError as e:
        html = e.read()

    match = re.search('data-sitekey="([^"]+)', html)
    match1 = re.search('data-ray="([^"]+)', html)
    if match and match1:
        token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha(match.group(1), lang='en', name=name, referer=url)
        if token:
            data = {'g-recaptcha-response': token, 'id': match1.group(1)}
            scheme = urlparse.urlparse(url).scheme
            domain = urlparse.urlparse(url).hostname
            url = '%s://%s/cdn-cgi/l/chk_captcha?%s' % (scheme, domain, urllib.urlencode(data))
            if cj is not None:
                try: cj.load(ignore_discard=True)
                except: pass
                opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
                urllib2.install_opener(opener)

            try:
                request = urllib2.Request(url)
                for key in headers: request.add_header(key, headers[key])
                opener = urllib2.build_opener(NoRedirection)
                urllib2.install_opener(opener)
                response = urllib2.urlopen(request)
                while response.getcode() in [301, 302, 303, 307]:
                    if cj is not None:
                        cj.extract_cookies(response, request)
                    redir_url = response.info().getheader('location')
                    if not redir_url.startswith('http'):
                        redir_url = urlparse.urljoin(url, redir_url)
                    request = urllib2.Request(redir_url)
                    for key in headers: request.add_header(key, headers[key])
                    if cj is not None:
                        cj.add_cookie_header(request)
                        
                    response = urllib2.urlopen(request)

                final = response.read()
                if cj is not None:
                    cj.extract_cookies(response, request)
                    cj.save(ignore_discard=True)
                    
                return final
            except urllib2.HTTPError as e:
                logger.log('CF Captcha Error: %s on url: %s' % (e.code, url), log_utils.LOGWARNING)
                return False
    else:
        logger.log('CF Captcha without sitekey/data-ray: %s' % (url), log_utils.LOGWARNING)
示例#2
0
    def resolve_link(self, link):
        try:
            headers = dict(
                [item.split('=') for item in (link.split('|')[1]).split('&')])
            for key in headers:
                headers[key] = urllib.unquote(headers[key])
            link = link.split('|')[0]
        except:
            headers = {}

        if not link.startswith('http'):
            link = urlparse.urljoin(self.base_url, link)
        html = self._http_get(link, headers=headers, cache_limit=0)

        fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'})
        if fragment:
            iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
            if iframe_url:
                iframe_url = iframe_url[0]
                headers = {'Referer': link}
                html = self._http_get(iframe_url,
                                      headers=headers,
                                      cache_limit=0)
                sitekey = dom_parser.parse_dom(html,
                                               'div', {'class': 'g-recaptcha'},
                                               ret='data-sitekey')
                if sitekey:
                    token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha(
                        sitekey[0], lang='en')
                    if token:
                        data = {'g-recaptcha-response': token}
                        html = self._http_get(iframe_url,
                                              data=data,
                                              cache_limit=0)

                match = re.search(
                    "\.replace\(\s*'([^']+)'\s*,\s*'([^']*)'\s*\)", html, re.I)
                if match:
                    html = html.replace(match.group(1), match.group(2))

                match = re.search("window\.atob[\([]+'([^']+)", html)
                if match:
                    func_count = len(re.findall('window\.atob', html))
                    html = match.group(1)
                    for _i in xrange(func_count):
                        html = base64.decodestring(html)

                streams = []
                for match in re.finditer(
                        '''<source[^>]+src=["']([^;'"]+)[^>]+label=['"]([^'"]+)''',
                        html):
                    streams.append(match.groups())

                if len(streams) > 1:
                    if not self.auto_pick:
                        result = xbmcgui.Dialog().select(
                            i18n('choose_stream'), [e[1] for e in streams])
                        if result > -1:
                            return streams[result][0] + '|User-Agent=%s' % (
                                scraper_utils.get_ua())
                    else:
                        best_stream = ''
                        best_q = 0
                        for stream in streams:
                            stream_url, label = stream
                            if Q_ORDER[scraper_utils.height_get_quality(
                                    label)] > best_q:
                                best_q = Q_ORDER[
                                    scraper_utils.height_get_quality(label)]
                                best_stream = stream_url

                        if best_stream:
                            return best_stream + '|User-Agent=%s' % (
                                scraper_utils.get_ua())
                elif streams:
                    return streams[0][0] + '|User-Agent=%s' % (
                        scraper_utils.get_ua())

                iframe_url = dom_parser.parse_dom(html, 'iframe', ret='src')
                if iframe_url:
                    return iframe_url[0]

        log_utils.log('No WatchHD Link Found: %s' % (html),
                      log_utils.LOGWARNING)