def solve(url, cj, user_agent=None, name=None): if user_agent is None: user_agent = USER_AGENT headers = {'User-Agent': user_agent, 'Referer': url} request = urllib2.Request(url) for key in headers: request.add_header(key, headers[key]) try: response = urllib2.urlopen(request) html = response.read() except urllib2.HTTPError as e: html = e.read() match = re.search('data-sitekey="([^"]+)', html) match1 = re.search('data-ray="([^"]+)', html) if match and match1: token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha(match.group(1), lang='en', name=name, referer=url) if token: data = {'g-recaptcha-response': token, 'id': match1.group(1)} scheme = urlparse.urlparse(url).scheme domain = urlparse.urlparse(url).hostname url = '%s://%s/cdn-cgi/l/chk_captcha?%s' % (scheme, domain, urllib.urlencode(data)) if cj is not None: try: cj.load(ignore_discard=True) except: pass opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) try: request = urllib2.Request(url) for key in headers: request.add_header(key, headers[key]) opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) response = urllib2.urlopen(request) while response.getcode() in [301, 302, 303, 307]: if cj is not None: cj.extract_cookies(response, request) redir_url = response.info().getheader('location') if not redir_url.startswith('http'): redir_url = urlparse.urljoin(url, redir_url) request = urllib2.Request(redir_url) for key in headers: request.add_header(key, headers[key]) if cj is not None: cj.add_cookie_header(request) response = urllib2.urlopen(request) final = response.read() if cj is not None: cj.extract_cookies(response, request) cj.save(ignore_discard=True) return final except urllib2.HTTPError as e: logger.log('CF Captcha Error: %s on url: %s' % (e.code, url), log_utils.LOGWARNING) return False else: logger.log('CF Captcha without sitekey/data-ray: %s' % (url), log_utils.LOGWARNING)
def resolve_link(self, link): try: headers = dict( [item.split('=') for item in (link.split('|')[1]).split('&')]) for key in headers: headers[key] = urllib.unquote(headers[key]) link = link.split('|')[0] except: headers = {} if not link.startswith('http'): link = urlparse.urljoin(self.base_url, link) html = self._http_get(link, headers=headers, cache_limit=0) fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = iframe_url[0] headers = {'Referer': link} html = self._http_get(iframe_url, headers=headers, cache_limit=0) sitekey = dom_parser.parse_dom(html, 'div', {'class': 'g-recaptcha'}, ret='data-sitekey') if sitekey: token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha( sitekey[0], lang='en') if token: data = {'g-recaptcha-response': token} html = self._http_get(iframe_url, data=data, cache_limit=0) match = re.search( "\.replace\(\s*'([^']+)'\s*,\s*'([^']*)'\s*\)", html, re.I) if match: html = html.replace(match.group(1), match.group(2)) match = re.search("window\.atob[\([]+'([^']+)", html) if match: func_count = len(re.findall('window\.atob', html)) html = match.group(1) for _i in xrange(func_count): html = base64.decodestring(html) streams = [] for match in re.finditer( '''<source[^>]+src=["']([^;'"]+)[^>]+label=['"]([^'"]+)''', html): streams.append(match.groups()) if len(streams) > 1: if not self.auto_pick: result = xbmcgui.Dialog().select( i18n('choose_stream'), [e[1] for e in streams]) if result > -1: return streams[result][0] + '|User-Agent=%s' % ( scraper_utils.get_ua()) else: best_stream = '' best_q = 0 for stream in streams: stream_url, label = stream if Q_ORDER[scraper_utils.height_get_quality( label)] > best_q: best_q = Q_ORDER[ scraper_utils.height_get_quality(label)] best_stream = stream_url if best_stream: return best_stream + '|User-Agent=%s' % ( scraper_utils.get_ua()) elif streams: return streams[0][0] + '|User-Agent=%s' % ( scraper_utils.get_ua()) iframe_url = dom_parser.parse_dom(html, 'iframe', ret='src') if iframe_url: return iframe_url[0] log_utils.log('No WatchHD Link Found: %s' % (html), log_utils.LOGWARNING)