def parse_payload(method, payload): if method == 'general': if 'query' in payload: payload['title'] = payload['query'] payload['titles'] = {'source': payload['query']} else: payload = { 'title': payload, 'titles': { 'source': payload }, } payload['titles'] = dict( (k.lower(), v) for k, v in payload['titles'].iteritems()) if get_setting('kodi_language', bool): kodi_language = xbmc.getLanguage(xbmc.ISO_639_1) if not kodi_language: log.warning("Kodi returned empty language code...") elif kodi_language not in payload.get('titles', {}): log.info("No '%s' translation available..." % kodi_language) else: payload["search_title"] = payload["titles"][kodi_language] if "search_title" not in payload: log.info( "Could not determine search title, falling back to normal title: %s", repr(payload["title"])) payload["search_title"] = payload["title"] return payload
def check_sizes(self): """ Internal method to make sure size range settings are valid """ if self.min_size > self.max_size: log.warning( "Minimum size above maximum, using max size minus 1 GB") self.min_size = self.max_size - 1
def search_jackett(payload, method): jackett = get_client() if jackett is None: utils.notify(utils.translation(32603), image=utils.get_icon_path()) return [] log.debug("Processing %s with Jackett", method) if method == 'movie': res = jackett.search_movie(payload["search_title"], payload['year'], payload["imdb_id"]) elif method == 'season': res = jackett.search_season(payload["search_title"], payload["season"], payload["imdb_id"]) elif method == 'episode': res = jackett.search_episode(payload["search_title"], payload["season"], payload["episode"], payload["imdb_id"]) elif method == 'anime': log.warning("jackett provider does not yet support anime search") res = [] log.info("anime payload=%s", repr(payload)) # client.search_query(payload["search_title"], payload["season"], payload["episode"], payload["imdb_id"]) else: res = jackett.search_query(payload["search_title"]) log.debug("%s search returned %d results", method, len(res)) res = filter_results(method, res) return sort_results(res)
def get_magnet_from_jackett(original_uri): magnet_prefix = 'magnet:' uri = original_uri while True: if len(uri) >= len(magnet_prefix) and uri[0:7] == magnet_prefix: return uri response = requests.get(uri, allow_redirects=False) if response.is_redirect: uri = response.headers['Location'] elif response.status_code == httplib.OK and response.headers.get( 'Content-Type') == 'application/x-bittorrent': torrent = Torrent.from_string(response.content) return torrent.get_magnet(True) else: log.warning( "Could not get final redirect location for URI %s. Response was: %d %s", original_uri, response.status_code, response.reason) log.debug("Response for failed redirect %s is", original_uri) log.debug("=" * 50) [ log.debug("%s: %s", h, k) for (h, k) in response.headers.iteritems() ] log.debug("") log.debug("%s", base64.standard_b64encode(response.content)) log.debug("=" * 50) break return None
def search_movie(self, title, year, imdb_id): if "search_tags" not in self._caps: notify(translation(32701), image=get_icon_path()) return [] movie_search_caps = self._caps["search_tags"]['movie-search'] if not movie_search_caps['enabled']: notify(translation(32702).format("movie"), image=get_icon_path()) log.warning( "Jackett has no movie capabilities, please add a indexer that has movie capabilities. " "Falling back to query search...") return self.search_query(title + u' ' + year) # todo what values are possible for imdb_id? movie_params = movie_search_caps["params"] request_params = {"t": "movie", "apikey": self._api_key} has_imdb_caps = 'imdbid' in movie_params log.debug("movie search; imdb_id=%s, has_imdb_caps=%s", imdb_id, has_imdb_caps) if imdb_id and has_imdb_caps: request_params["imdbid"] = imdb_id else: request_params["q"] = title + u' ' + year log.debug("searching movie with query=%s", request_params["q"]) return self._do_search_request(request_params)
def _search_re_keys(name, re_dict, log_msg, default=""): for result, search_keys in re_dict.iteritems(): if bool( re.search(r'\W+(' + "|".join(search_keys) + r')\W*', name, re.IGNORECASE)): return result log.warning("Could not determine %s from filename '%s'", log_msg, name) return default
def cleanup_results(results_list): """ Remove duplicate results, hash results without an info_hash, and sort by seeders Args: results_list (list): Results to clean-up Returns: list: De-duplicated, hashed and sorted results """ if len(results_list) == 0: return [] hashes = [] filtered_list = [] allow_noseeds = get_setting('allow_noseeds', bool) for result in results_list: if not result['seeds'] and not allow_noseeds: continue if not result['uri']: if not result['name']: continue try: log.warning('[%s] No URI for %s' % (result['provider'][16:-8], repr(result['name']))) except Exception as e: import traceback log.warning("%s logging failed with: %s" % (result['provider'], repr(e))) map(log.debug, traceback.format_exc().split("\n")) continue hash_ = result['info_hash'].upper() if not hash_: try: if result['uri'] and result['uri'].startswith('magnet'): hash_ = Magnet(result['uri']).info_hash.upper() else: hash_ = hashlib.md5(py2_encode(result['uri'])).hexdigest() except: pass # try: # log.debug("[%s] Hash for %s: %s" % (result['provider'][16:-8], repr(result['name']), hash_)) # except Exception as e: # import traceback # log.warning("%s logging failed with: %s" % (result['provider'], repr(e))) # map(log.debug, traceback.format_exc().split("\n")) if not any(existing == hash_ for existing in hashes): filtered_list.append(result) hashes.append(hash_) return sorted(filtered_list, key=lambda r: (get_int(r['seeds'])), reverse=True)
def search_query(self, query): if not self._caps["search_tags"]['search']: notify(translation(32702).format("query"), image=get_icon_path()) log.warning( "Jackett has no search capabilities, please add a indexer that has search capabilities." ) return [] request_params = {"apikey": self._api_key, "q": query} return self._do_search_request(request_params)
def search_shows(self, title, season=None, episode=None, imdb_id=None): if "search_tags" not in self._caps: notify(translation(32701), image=get_icon_path()) return [] tv_search_caps = self._caps["search_tags"]['tv-search'] if not tv_search_caps['enabled']: notify(translation(32702).format("show"), image=get_icon_path()) log.warning( "Jackett has no tvsearch capabilities, please add a indexer that has tvsearch capabilities. " "Falling back to query search...") title_ep = title if bool(season): title_ep = "{} S{:0>2}".format(title_ep, season) if bool(episode): title_ep = "{}E{:0>2}".format(title_ep, episode) results = self.search_query(title_ep) if get_setting("search_season_on_episode", bool) and bool(season) and bool(episode): season_query = re.escape("{:0>2}".format(season)) results = results + self._filter_season( self.search_query("{} S{}".format(title, season_query)), season) return results # todo what values are possible for imdb_id? tv_params = tv_search_caps["params"] request_params = {"t": "tvsearch", "apikey": self._api_key} has_imdb_caps = 'imdbid' in tv_params log.debug("movie search; imdb_id=%s, has_imdb_caps=%s", imdb_id, has_imdb_caps) if imdb_id and has_imdb_caps: request_params["imdbid"] = imdb_id else: log.debug("searching tv show with query=%s, season=%s, episode=%s", title, season, episode) request_params["q"] = title if bool(season) and 'season' in tv_params: request_params["season"] = season if bool(episode) and 'ep' in tv_params: request_params["ep"] = episode results = self._do_search_request(request_params) if get_setting( "search_season_on_episode", bool ) and 'season' in request_params and 'ep' in request_params: del request_params['ep'] results = results + self._filter_season( self._do_search_request(request_params), season) return results
def get_client(): host = urlparse(get_setting('host')) if host.netloc == '' or host.scheme == '': log.warning("Host %s is invalid. Can't return anything", get_setting('host')) utils.notify(utils.translation(32600), image=utils.get_icon_path()) return None api_key = get_setting('api_key') if len(api_key) != 32: utils.notify(utils.translation(32601), image=utils.get_icon_path()) return None else: log.debug("jackett host: %s", host) log.debug("jackett api_key: %s%s%s", api_key[0:2], "*" * 26, api_key[-4:]) return Jackett(host=host.geturl(), api_key=api_key)
def cleanup_results(results_list): """ Remove duplicate results, hash results without an info_hash, and sort by seeders Args: results_list (list): Results to clean-up Returns: list: De-duplicated, hashed and sorted results """ if len(results_list) == 0: return [] hashes = [] filtered_list = [] allow_noseeds = get_setting('allow_noseeds', bool) for result in results_list: if not result['seeds'] and not allow_noseeds: continue provider_name = result['provider'][result['provider'].find(']')+1:result['provider'].find('[/')] if not result['uri']: if not result['name']: continue try: log.warning('[%s] No URI for %s' % (provider_name, repr(result['name']))) except Exception as e: import traceback log.warning("%s logging failed with: %s" % (provider_name, repr(e))) map(log.debug, traceback.format_exc().split("\n")) continue hash_ = result['info_hash'].upper() if not hash_: if result['uri'] and result['uri'].startswith('magnet'): hash_ = Magnet(result['uri']).info_hash.upper() else: hash_ = hashlib.md5(result['uri']).hexdigest() try: log.debug("[%s] Hash for %s: %s" % (provider_name, repr(result['name']), hash_)) except Exception as e: import traceback log.warning("%s logging failed with: %s" % (result['provider'], repr(e))) map(log.debug, traceback.format_exc().split("\n")) if not any(existing == hash_ for existing in hashes): filtered_list.append(result) hashes.append(hash_) if (get_setting("sort_by_resolution", bool)): log.debug("[EXPEREMENTAL] Start last sorting list by resolution of all result before send to Elementum") filtered_list = sorted(filtered_list, key=lambda r: (get_int(r.pop('resolution'))), reverse=True) else: filtered_list = sorted(filtered_list, key=lambda r: (get_int(r['seeds'])), reverse=True) return filtered_list
def open(self, url, language='en', post_data=None, get_data=None): """ Opens a connection to a webpage and saves its HTML content in ``self.content`` Args: url (str): The URL to open language (str): The language code for the ``Content-Language`` header post_data (dict): POST data for the request get_data (dict): GET data for the request """ if not post_data: post_data = {} if get_data: url += '?' + urlencode(get_data) log.debug("Opening URL: %s" % repr(url)) result = False data = urlencode(post_data) if len(post_data) > 0 else None req = urllib2.Request(url, data) self._read_cookies(url) log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies))) handlers = [] if get_setting("use_elementum_proxy", bool) and self.proxy_url: if self.proxy_type: if self.proxy_type == 2: proxyHandler = urllib2.ProxyHandler({ 'http': self.proxy_url, 'https': self.proxy_url, }) handlers.append(proxyHandler) elif self.proxy_type == 1: import proxy.socks as socks from proxy.sockshandler import SocksiPyHandler prx_info = self.proxy_url.split(':') handlers.append(SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, prx_info[1].replace("//", ''), int(prx_info[2]))) else: proxyHandler = urllib2.ProxyHandler({ 'http': self.proxy_url, 'https': self.proxy_url, }) handlers.append(proxyHandler) cookieHandler = urllib2.HTTPCookieProcessor(self._cookies) handlers.append(cookieHandler) opener = urllib2.build_opener(*handlers) req.add_header('User-Agent', self.user_agent) req.add_header('Content-Language', language) req.add_header("Accept-Encoding", "gzip") req.add_header("Origin", url) req.add_header("Referer", url) try: self._good_spider() with closing(opener.open(req)) as response: self.headers = response.headers self._save_cookies() if response.headers.get("Content-Encoding", "") == "gzip": import zlib self.content = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress(response.read()) else: self.content = response.read() charset = response.headers.getparam('charset') if not charset: match = re.search("""<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""", self.content) if match: charset = match.group(1) if charset and charset.lower() == 'utf-8': charset = 'utf-8-sig' # Changing to utf-8-sig to remove BOM if found on decode from utf-8 if charset: log.debug('Decoding charset from %s for %s' % (charset, repr(url))) self.content = self.content.decode(charset, 'replace') self.status = response.getcode() result = True except urllib2.HTTPError as e: self.status = e.code log.warning("Status for %s : %s" % (repr(url), str(self.status))) except urllib2.URLError as e: self.status = repr(e.reason) log.warning("Status for %s : %s" % (repr(url), self.status)) except Exception as e: import traceback log.error("%s failed with %s:" % (repr(url), repr(e))) map(log.debug, traceback.format_exc().split("\n")) log.debug("Status for %s : %s" % (repr(url), str(self.status))) return result
def search(payload, method="general"): """ Main search entrypoint Args: payload (dict): Search payload from Elementum. method (str): Type of search, can be ``general``, ``movie``, ``show``, ``season`` or ``anime`` Returns: list: All filtered results in the format Elementum expects """ log.debug("Searching with payload (%s): %s" % (method, repr(payload))) if method == 'general': if 'query' in payload: payload['title'] = payload['query'] payload['titles'] = {'source': payload['query']} else: payload = { 'title': payload, 'titles': { 'source': payload }, } payload['titles'] = dict( (k.lower(), v) for k, v in payload['titles'].iteritems()) # If titles[] exists in payload and there are special chars in titles[source] # then we set a flag to possibly modify the search query payload['has_special'] = 'titles' in payload and \ bool(payload['titles']) and \ 'source' in payload['titles'] and \ any(c in payload['titles']['source'] for c in special_chars) if payload['has_special']: log.debug( "Query title contains special chars, so removing any quotes in the search query" ) if 'proxy_url' not in payload: payload['proxy_url'] = '' if 'internal_proxy_url' not in payload: payload['internal_proxy_url'] = '' if 'elementum_url' not in payload: payload['elementum_url'] = '' if 'silent' not in payload: payload['silent'] = False if 'skip_auth' not in payload: payload['skip_auth'] = False global request_time global provider_names global provider_results global available_providers provider_names = [] provider_results = [] available_providers = 0 request_time = time.time() providers = get_enabled_providers(method) if len(providers) == 0: if not payload['silent']: notify(translation(32060), image=get_icon_path()) log.error("No providers enabled") return [] log.info( "Burstin' with %s" % ", ".join([definitions[provider]['name'] for provider in providers])) if get_setting('kodi_language', bool): kodi_language = xbmc.getLanguage(xbmc.ISO_639_1) if not kodi_language: log.warning("Kodi returned empty language code...") elif 'titles' not in payload or not payload['titles']: log.info("No translations available...") elif payload['titles'] and kodi_language not in payload['titles']: log.info("No '%s' translation available..." % kodi_language) p_dialog = xbmcgui.DialogProgressBG() if not payload['silent']: p_dialog.create('Elementum [COLOR FFFF6B00]Burst[/COLOR]', translation(32061)) for provider in providers: available_providers += 1 provider_names.append(definitions[provider]['name']) task = Thread(target=run_provider, args=(provider, payload, method)) task.start() providers_time = time.time() total = float(available_providers) # Exit if all providers have returned results or timeout reached, check every 100ms while time.time() - providers_time < timeout and available_providers > 0: timer = time.time() - providers_time log.debug("Timer: %ds / %ds" % (timer, timeout)) if timer > timeout: break message = translation( 32062 ) % available_providers if available_providers > 1 else translation( 32063) if not payload['silent']: p_dialog.update(int((total - available_providers) / total * 100), message=message) time.sleep(0.25) if not payload['silent']: p_dialog.close() del p_dialog if available_providers > 0: message = u', '.join(provider_names) message = message + translation(32064) log.warning(message.encode('utf-8')) if not payload['silent']: notify(message, ADDON_ICON) log.debug("all provider_results: %s" % repr(provider_results)) filtered_results = apply_filters(provider_results) log.debug("all filtered_results: %s" % repr(filtered_results)) log.info("Providers returned %d results in %s seconds" % (len(filtered_results), round(time.time() - request_time, 2))) return filtered_results
def fix_bad_unicode(string): """ https://blog.luminoso.com/2012/08/20/fix-unicode-mistakes-with-python/ Something you will find all over the place, in real-world text, is text that's mistakenly encoded as utf-8, decoded in some ugly format like latin-1 or even Windows codepage 1252, and encoded as utf-8 again. This causes your perfectly good Unicode-aware code to end up with garbage text because someone else (or maybe 'someone else') made a mistake. This function looks for the evidence of that having happened and fixes it. It determines whether it should replace nonsense sequences of single-byte characters that were really meant to be UTF-8 characters, and if so, turns them into the correctly-encoded Unicode character that they were meant to represent. The input to the function must be Unicode. It's not going to try to auto-decode bytes for you -- then it would just create the problems it's supposed to fix. //>>> print fix_bad_unicode(u'único') único //>>> print fix_bad_unicode(u'This text is fine already :þ') This text is fine already :þ Because these characters often come from Microsoft products, we allow for the possibility that we get not just Unicode characters 128-255, but also Windows's conflicting idea of what characters 128-160 are. //>>> print fix_bad_unicode(u'This — should be an em dash') This — should be an em dash We might have to deal with both Windows characters and raw control characters at the same time, especially when dealing with characters like \x81 that have no mapping in Windows. //>>> print fix_bad_unicode(u'This text is sad .â\x81”.') This text is sad .?. This function even fixes multiple levels of badness: //>>> wtf = u'\xc3\xa0\xc2\xb2\xc2\xa0_\xc3\xa0\xc2\xb2\xc2\xa0' //>>> print fix_bad_unicode(wtf) ?_? However, it has safeguards against fixing sequences of letters and punctuation that can occur in valid text: //>>> print fix_bad_unicode(u'not such a fan of Charlotte Brontë…”') not such a fan of Charlotte Brontë…” Cases of genuine ambiguity can sometimes be addressed by finding other characters that are not double-encoding, and expecting the encoding to be consistent: //>>> print fix_bad_unicode(u'AHÅ™, the new sofa from IKEA®') AHÅ™, the new sofa from IKEA® Finally, we handle the case where the text is in a single-byte encoding that was intended as Windows-1252 all along but read as Latin-1: //>>> print fix_bad_unicode(u'This text was never Unicode at all\x85') This text was never Unicode at all… """ if not isinstance(string, unicode): raise TypeError("This isn't even decoded into Unicode yet. " 'Decode it first.') if len(string) == 0: return string max_ord = max(ord(char) for char in string) if max_ord < 128: # Hooray! It's ASCII! return string else: try: attempts = [(string, text_badness(string) + len(string))] if max_ord < 256: tried_fixing = reinterpret_latin1_as_utf8(string) tried_fixing2 = reinterpret_latin1_as_windows1252(string) attempts.append((tried_fixing, text_cost(tried_fixing))) attempts.append((tried_fixing2, text_cost(tried_fixing2))) elif all(ord(char) in WINDOWS_1252_CODEPOINTS for char in string): tried_fixing = reinterpret_windows1252_as_utf8(string) attempts.append((tried_fixing, text_cost(tried_fixing))) else: # We can't imagine how this would be anything but valid text. return string # Sort the results by badness attempts.sort(key=lambda x: x[1]) # print attempts good_text = attempts[0][0] if good_text == string: return good_text else: return fix_bad_unicode(good_text) except Exception as e: import traceback log.warning("Could not fix unicode string: %s" % repr(e)) map(log.debug, traceback.format_exc().split("\n"))
def open(self, url, language='en', post_data=None, get_data=None): """ Opens a connection to a webpage and saves its HTML content in ``self.content`` Args: url (str): The URL to open language (str): The language code for the ``Content-Language`` header post_data (dict): POST data for the request get_data (dict): GET data for the request """ if not post_data: post_data = {} if get_data: url += '?' + urlencode(get_data) log.debug("Opening URL: %s" % repr(url)) result = False data = urlencode(post_data) if len(post_data) > 0 else None req = urllib2.Request(url, data) self._read_cookies(url) log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies))) opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self._cookies)) if get_setting("use_public_dns", bool): opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self._cookies), MyHTTPHandler) urllib2.install_opener(opener) req.add_header('User-Agent', self.user_agent) req.add_header('Content-Language', language) req.add_header("Accept-Encoding", "gzip") req.add_header("Origin", url) req.add_header("Referer", url) if self.token: req.add_header("Authorization", self.token) try: self._good_spider() with closing(opener.open(req)) as response: self.headers = response.headers self._save_cookies() if response.headers.get("Content-Encoding", "") == "gzip": import zlib self.content = zlib.decompressobj( 16 + zlib.MAX_WBITS).decompress(response.read()) else: self.content = response.read() charset = response.headers.getparam('charset') if not charset: match = re.search( """<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""", self.content) if match: charset = match.group(1) if charset and charset.lower() == 'utf-8': charset = 'utf-8-sig' # Changing to utf-8-sig to remove BOM if found on decode from utf-8 if charset: log.debug('Decoding charset from %s for %s' % (charset, repr(url))) self.content = self.content.decode(charset, 'replace') self.status = response.getcode() result = True except urllib2.HTTPError as e: self.status = e.code log.warning("Status for %s : %s" % (repr(url), str(self.status))) if e.code == 403 or e.code == 503: log.warning("CloudFlared at %s, try enabling CloudHole" % url) except urllib2.URLError as e: self.status = repr(e.reason) log.warning("Status for %s : %s" % (repr(url), self.status)) except Exception as e: import traceback log.error("%s failed with %s:" % (repr(url), repr(e))) map(log.debug, traceback.format_exc().split("\n")) log.debug("Status for %s : %s" % (repr(url), str(self.status))) return result
def process(provider, generator, filtering, has_special, verify_name=True, verify_size=True): """ Method for processing provider results using its generator and Filtering class instance Args: provider (str): Provider ID generator (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api`` filtering (Filtering): Filtering class instance has_special (bool): Whether title contains special chars verify_name (bool): Whether to double-check the results' names match the query or not verify_size (bool): Whether to check the results' file sizes """ log.debug("execute_process for %s with %s" % (provider, repr(generator))) definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) client = Client() token = None logged_in = False token_auth = False if get_setting("use_cloudhole", bool): client.clearance = get_setting('clearance') client.user_agent = get_setting('user_agent') if get_setting('kodi_language', bool): kodi_language = xbmc.getLanguage(xbmc.ISO_639_1) if kodi_language: filtering.kodi_language = kodi_language language_exceptions = get_setting('language_exceptions') if language_exceptions.strip().lower(): filtering.language_exceptions = re.split(r',\s?', language_exceptions) log.debug("[%s] Queries: %s" % (provider, filtering.queries)) log.debug("[%s] Extras: %s" % (provider, filtering.extras)) for query, extra in zip(filtering.queries, filtering.extras): log.debug("[%s] Before keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if has_special: # Removing quotes, surrounding {title*} keywords, when title contains special chars query = re.sub("[\"']({title.*?})[\"']", '\\1', query) query = filtering.process_keywords(provider, query) extra = filtering.process_keywords(provider, extra) if 'charset' in definition and 'utf' not in definition[ 'charset'].lower(): try: query = urllib.quote(query.encode(definition['charset'])) extra = urllib.quote(extra.encode(definition['charset'])) except: pass log.debug("[%s] After keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if not query: return filtering.results url_search = filtering.url.replace('QUERY', query) if extra: url_search = url_search.replace('EXTRA', extra) else: url_search = url_search.replace('EXTRA', '') url_search = url_search.replace(' ', definition['separator']) # MagnetDL fix... url_search = url_search.replace('FIRSTLETTER', query[:1]) # Creating the payload for POST method if 'post_data' in definition and not filtering.post_data: filtering.post_data = eval(definition['post_data']) payload = dict() for key, value in filtering.post_data.iteritems(): if 'QUERY' in value: payload[key] = filtering.post_data[key].replace('QUERY', query) else: payload[key] = filtering.post_data[key] # Creating the payload for GET method data = None if filtering.get_data: data = dict() for key, value in filtering.get_data.iteritems(): if 'QUERY' in value: data[key] = filtering.get_data[key].replace('QUERY', query) else: data[key] = filtering.get_data[key] log.debug("- %s query: %s" % (provider, repr(query))) log.debug("-- %s url_search before token: %s" % (provider, repr(url_search))) log.debug("--- %s using POST payload: %s" % (provider, repr(payload))) log.debug("----%s filtering with post_data: %s" % (provider, repr(filtering.post_data))) # Set search's "title" in filtering to double-check results' names if 'filter_title' in definition and definition['filter_title']: filtering.filter_title = True filtering.title = query if token: log.info('[%s] Reusing existing token' % provider) url_search = url_search.replace('TOKEN', token) elif 'token' in definition: token_url = definition['base_url'] + definition['token'] log.debug("Getting token for %s at %s" % (provider, repr(token_url))) client.open(token_url.encode('utf-8')) try: token_data = json.loads(client.content) except: log.error('%s: Failed to get token for %s' % (provider, repr(url_search))) return filtering.results log.debug("Token response for %s: %s" % (provider, repr(token_data))) if 'token' in token_data: token = token_data['token'] log.debug("Got token for %s: %s" % (provider, repr(token))) url_search = url_search.replace('TOKEN', token) else: log.warning('%s: Unable to get token for %s' % (provider, repr(url_search))) if logged_in: log.info("[%s] Reusing previous login" % provider) elif token_auth: log.info("[%s] Reusing previous token authorization" % provider) elif 'private' in definition and definition['private']: username = get_setting('%s_username' % provider) password = get_setting('%s_password' % provider) passkey = get_setting('%s_passkey' % provider) if not username and not password and not passkey: for addon_name in ('script.magnetic.%s' % provider, 'script.magnetic.%s-mc' % provider): for setting in ('username', 'password'): try: value = xbmcaddon.Addon(addon_name).getSetting( setting) set_setting('%s_%s' % (provider, setting), value) if setting == 'username': username = value if setting == 'password': password = value except: pass if passkey: logged_in = True client.passkey = passkey url_search = url_search.replace('PASSKEY', passkey) elif 'login_object' in definition and definition['login_object']: logged_in = False login_object = definition['login_object'].replace( 'USERNAME', '"%s"' % username).replace('PASSWORD', '"%s"' % password) # TODO generic flags in definitions for those... if provider == 'hd-torrents': client.open(definition['root_url'] + definition['login_path']) if client.content: csrf_token = re.search( r'name="csrfToken" value="(.*?)"', client.content) if csrf_token: login_object = login_object.replace( 'CSRF_TOKEN', '"%s"' % csrf_token.group(1)) else: logged_in = True if provider == 'lostfilm': client.open(definition['root_url'] + '/v_search.php?c=111&s=1&e=1') if client.content is not 'log in first': logged_in = True if 'token_auth' in definition: # log.debug("[%s] logging in with: %s" % (provider, login_object)) if client.open(definition['root_url'] + definition['token_auth'], post_data=eval(login_object)): try: token_data = json.loads(client.content) except: log.error('%s: Failed to get token from %s' % (provider, definition['token_auth'])) return filtering.results log.debug("Token response for %s: %s" % (provider, repr(token_data))) if 'token' in token_data: client.token = token_data['token'] log.debug("Auth token for %s: %s" % (provider, repr(client.token))) else: log.error('%s: Unable to get auth token for %s' % (provider, repr(url_search))) return filtering.results log.info('[%s] Token auth successful' % provider) token_auth = True else: log.error("[%s] Token auth failed with response: %s" % (provider, repr(client.content))) return filtering.results elif not logged_in and client.login( definition['root_url'] + definition['login_path'], eval(login_object), definition['login_failed']): log.info('[%s] Login successful' % provider) logged_in = True elif not logged_in: log.error("[%s] Login failed: %s", provider, client.status) log.debug("[%s] Failed login content: %s", provider, repr(client.content)) return filtering.results if logged_in: if provider == 'hd-torrents': client.open(definition['root_url'] + '/torrents.php') csrf_token = re.search( r'name="csrfToken" value="(.*?)"', client.content) url_search = url_search.replace( "CSRF_TOKEN", csrf_token.group(1)) if provider == 'lostfilm': log.info('[%s] Need open page before search', provider) client.open(url_search.encode('utf-8'), post_data=payload, get_data=data) search_info = re.search(r'PlayEpisode\((.*?)\)">', client.content) if search_info: series_details = re.search( '\'(\d+)\',\'(\d+)\',\'(\d+)\'', search_info.group(1)) client.open(definition['root_url'] + '/v_search.php?c=%s&s=%s&e=%s' % (series_details.group(1), series_details.group(2), series_details.group(3))) redirect_url = re.search(ur'url=(.*?)">', client.content) if redirect_url is not None: url_search = redirect_url.group(1) else: return filtering.results log.info("> %s search URL: %s" % (definition['name'].rjust(longest), url_search)) client.open(url_search.encode('utf-8'), post_data=payload, get_data=data) filtering.results.extend( generate_payload(provider, generator(provider, client), filtering, verify_name, verify_size)) return filtering.results
def open(self, url, language='en', post_data=None, get_data=None, headers=None, proxy_url=None, charset='utf8'): """ Opens a connection to a webpage and saves its HTML content in ``self.content`` Args: url (str): The URL to open language (str): The language code for the ``Content-Language`` header post_data (dict): POST data for the request get_data (dict): GET data for the request """ if not post_data: post_data = {} if get_data: url += '?' + urlencode(get_data) log.debug("Opening URL: %s" % repr(url)) result = False data = urlencode(post_data) if len(post_data) > 0 else None req = urllib2.Request(url, data) self._read_cookies(url) log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies))) # Parsing proxy information proxy = { 'enabled': get_setting("proxy_enabled", bool), 'use_type': get_setting("proxy_use_type", int), 'type': proxy_types[0], 'host': get_setting("proxy_host", unicode), 'port': get_setting("proxy_port", int), 'login': get_setting("proxy_login", unicode), 'password': get_setting("proxy_password", unicode), } try: proxy['type'] = proxy_types[get_setting("proxy_type", int)] except: pass handlers = [urllib2.HTTPCookieProcessor(self._cookies)] if get_setting("use_public_dns", bool): handlers.append(MyHTTPHandler) if proxy['enabled']: if proxy['use_type'] == 0 and proxy_url: log.debug("Setting proxy from Elementum: %s" % (proxy_url)) handlers.append(parse_proxy_url(proxy_url)) elif proxy['use_type'] == 1: log.debug("Setting proxy with custom settings: %s" % (repr(proxy))) handlers.append( SocksiPyHandler(proxytype=proxy['type'], proxyaddr=proxy['host'], proxyport=int(proxy['port']), username=proxy['login'], password=proxy['password'], rdns=True)) elif proxy['use_type'] == 2: try: handlers.append(antizapret.AntizapretProxyHandler()) except Exception as e: log.info("Could not create antizapret configuration: %s" % (e)) opener = urllib2.build_opener(*handlers) req.add_header('User-Agent', self.user_agent) req.add_header('Content-Language', language) req.add_header("Accept-Encoding", "gzip") req.add_header("Origin", url) req.add_header("Referer", url) if headers: for key, value in headers.iteritems(): if value: req.add_header(key, value) else: del req.headers[key.capitalize()] if self.token: req.add_header("Authorization", self.token) try: self._good_spider() with closing(opener.open(req)) as response: self.headers = response.headers self._save_cookies() if response.headers.get("Content-Encoding", "") == "gzip": import zlib self.content = zlib.decompressobj( 16 + zlib.MAX_WBITS).decompress(response.read()) else: self.content = response.read() charset = response.headers.getparam('charset') if not charset: match = re.search( """<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""", self.content) if match: charset = match.group(1) # We try to remove non-utf chars. Should we? if (charset and charset.lower() == 'utf-8') or charset is None: charset = 'utf-8-sig' # Changing to utf-8-sig to remove BOM if found on decode from utf-8 if charset: log.debug('Decoding charset from %s for %s' % (charset, repr(url))) self.content = self.content.decode(charset, 'replace') self.status = response.getcode() result = True except urllib2.HTTPError as e: self.status = e.code log.warning("Status for %s : %s" % (repr(url), str(self.status))) if e.code == 403 or e.code == 503: log.warning("CloudFlared at %s, try enabling CloudHole" % url) except urllib2.URLError as e: self.status = repr(e.reason) log.warning("Status for %s : %s" % (repr(url), self.status)) except Exception as e: import traceback log.error("%s failed with %s:" % (repr(url), repr(e))) map(log.debug, traceback.format_exc().split("\n")) log.debug("Status for %s : %s" % (repr(url), str(self.status))) return result
def process(provider, generator, filtering, has_special, verify_name=True, verify_size=True, skip_auth=False, start_time=None, timeout=None): """ Method for processing provider results using its generator and Filtering class instance Args: provider (str): Provider ID generator (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api`` filtering (Filtering): Filtering class instance has_special (bool): Whether title contains special chars verify_name (bool): Whether to double-check the results' names match the query or not verify_size (bool): Whether to check the results' file sizes """ log.debug("[%s] execute_process for %s with %s" % (provider, provider, repr(generator))) definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) client = Client(info=filtering.info, request_charset=definition['charset'], response_charset=definition['response_charset']) token = None logged_in = False token_auth = False if get_setting('kodi_language', bool): kodi_language = xbmc.getLanguage(xbmc.ISO_639_1) if kodi_language: filtering.kodi_language = kodi_language language_exceptions = get_setting('language_exceptions') if language_exceptions.strip().lower(): filtering.language_exceptions = re.split(r',\s?', language_exceptions) log.debug("[%s] Queries: %s" % (provider, filtering.queries)) log.debug("[%s] Extras: %s" % (provider, filtering.extras)) for query, extra in zip(filtering.queries, filtering.extras): log.debug("[%s] Before keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if has_special: # Removing quotes, surrounding {title*} keywords, when title contains special chars query = re.sub("[\"']({title.*?})[\"']", '\\1', query) query = filtering.process_keywords(provider, query) extra = filtering.process_keywords(provider, extra) if not query: continue elif extra == '-' and filtering.results: continue elif start_time and timeout and time.time() - start_time + 3 >= timeout: # Stop doing requests if there is 3 seconds left for the overall task continue try: if 'charset' in definition and definition['charset'] and 'utf' not in definition['charset'].lower(): query = quote(query.encode(definition['charset'])) extra = quote(extra.encode(definition['charset'])) else: query = quote(py2_encode(query)) extra = quote(py2_encode(extra)) except Exception as e: log.debug("[%s] Could not quote the query (%s): %s" % (provider, query, e)) pass log.debug("[%s] After keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if not query: return filtering.results url_search = filtering.url.replace('QUERY', query) if extra and extra != '-': url_search = url_search.replace('EXTRA', extra) else: url_search = url_search.replace('EXTRA', '') url_search = url_search.replace(' ', definition['separator']) if definition['separator'] != '%20': url_search = url_search.replace('%20', definition['separator']) # MagnetDL fix... url_search = url_search.replace('FIRSTLETTER', query[:1]) # Creating the payload for POST method if 'post_data' in definition and not filtering.post_data: filtering.post_data = eval(definition['post_data']) payload = dict() for key, value in iteritems(filtering.post_data): if 'QUERY' in value: payload[key] = filtering.post_data[key].replace('QUERY', query) else: payload[key] = filtering.post_data[key] payload[key] = urllib.unquote(payload[key]) # Creating the payload for GET method headers = None data = None if filtering.get_data: data = dict() for key, value in iteritems(filtering.get_data): if 'QUERY' in value: data[key] = filtering.get_data[key].replace('QUERY', query) else: data[key] = filtering.get_data[key] log.debug("- %s query: %s" % (provider, repr(query))) log.debug("-- %s url_search before token: %s" % (provider, repr(url_search))) log.debug("--- %s using POST payload: %s" % (provider, repr(payload))) log.debug("----%s filtering with post_data: %s" % (provider, repr(filtering.post_data))) # Set search's "title" in filtering to double-check results' names if 'filter_title' in definition and definition['filter_title']: filtering.filter_title = True filtering.title = query if 'initial_url' in definition and definition['initial_url']: url = definition['initial_url'] if not url.startswith('http'): url = definition['root_url'] + url client.open(url) if token: log.info('[%s] Reusing existing token' % provider) url_search = url_search.replace('TOKEN', token) elif 'token' in definition: token_url = definition['base_url'] + definition['token'] log.debug("[%s] Getting token for %s at %s" % (provider, provider, repr(token_url))) client.open(py2_encode(token_url)) try: token_data = json.loads(client.content) except: log.error('%s: Failed to get token for %s' % (provider, repr(url_search))) return filtering.results log.debug("[%s] Token response for %s: %s" % (provider, provider, repr(token_data))) if 'token' in token_data: token = token_data['token'] log.debug("[%s] Got token for %s: %s" % (provider, provider, repr(token))) url_search = url_search.replace('TOKEN', token) else: log.warning('%s: Unable to get token for %s' % (provider, repr(url_search))) if logged_in: log.info("[%s] Reusing previous login" % provider) elif token_auth: log.info("[%s] Reusing previous token authorization" % provider) elif 'private' in definition and definition['private']: username = get_setting('%s_username' % provider, unicode) password = get_setting('%s_password' % provider, unicode) passkey = get_setting('%s_passkey' % provider, unicode) if not username and not password and not passkey: for addon_name in ('script.magnetic.%s' % provider, 'script.magnetic.%s-mc' % provider): for setting in ('username', 'password'): try: value = xbmcaddon.Addon(addon_name).getSetting(setting) set_setting('%s_%s' % (provider, setting), value) if setting == 'username': username = value if setting == 'password': password = value except: pass if username: client.username = username url_search = url_search.replace('USERNAME', username) if passkey: logged_in = True client.passkey = passkey url_search = url_search.replace('PASSKEY', passkey) elif 'login_object' in definition and definition['login_object']: login_object = None login_headers = None logged_in = skip_auth try: login_object = definition['login_object'].replace('USERNAME', 'u"%s"' % username).replace('PASSWORD', 'u"%s"' % password) except Exception as e: log.error("Could not make login object for %s: %s" % (provider, e)) try: if 'login_headers' in definition and definition['login_headers']: login_headers = eval(definition['login_headers']) except Exception as e: log.error("Could not make login headers for %s: %s" % (provider, e)) # TODO generic flags in definitions for those... if 'csrf_token' in definition and definition['csrf_token']: client.open(definition['root_url'] + definition['login_path']) if client.content: csrf_token = re.search(r'name=\"_?csrf_token\" value=\"(.*?)\"', client.content) if csrf_token: login_object = login_object.replace('CSRF_TOKEN', '"%s"' % csrf_token.group(1)) else: logged_in = True if 'token_auth' in definition: # log.debug("[%s] logging in with: %s" % (provider, login_object)) if client.open(definition['root_url'] + definition['token_auth'], post_data=eval(login_object)): try: token_data = json.loads(client.content) except: log.error('%s: Failed to get token from %s' % (provider, definition['token_auth'])) return filtering.results log.debug("[%s] Token response for %s: %s" % (provider, provider, repr(token_data))) if 'token' in token_data: client.token = token_data['token'] log.debug("[%s] Auth token for %s: %s" % (provider, provider, repr(client.token))) else: log.error('[%s] Unable to get auth token for %s' % (provider, repr(url_search))) return filtering.results log.info('[%s] Token auth successful' % provider) token_auth = True else: log.error("[%s] Token auth failed with response: %s" % (provider, repr(client.content))) return filtering.results elif not logged_in and client.login(definition['root_url'], definition['login_path'], eval(login_object), login_headers, definition['login_failed']): log.info('[%s] Login successful' % provider) logged_in = True elif not logged_in: log.error("[%s] Login failed: %s", provider, client.status) log.debug("[%s] Failed login content: %s", provider, repr(client.content)) return filtering.results if logged_in: if provider == 'hd-torrents': client.open(definition['root_url'] + '/torrents.php') csrf_token = re.search(r'name="csrfToken" value="(.*?)"', client.content) url_search = url_search.replace("CSRF_TOKEN", csrf_token.group(1)) client.save_cookies() log.info("[%s] > %s search URL: %s" % (provider, definition['name'].rjust(longest), url_search)) if 'headers' in definition and definition['headers']: headers = eval(definition['headers']) log.info("[%s] > %s headers: %s" % (provider, definition['name'].rjust(longest), headers)) client.open(py2_encode(url_search), post_data=payload, get_data=data, headers=headers) filtering.results.extend( generate_payload(provider, generator(provider, client), filtering, verify_name, verify_size)) return filtering.results
def _parse_item(self, item): result = { "name": None, "provider": "Unknown", "size": "Unknown", "uri": None, "seeds": "0", "peers": "0", "info_hash": "", "language": None, # todo would be nice to assign correct icons but that can be very time consuming due to the number # of indexers in Jackett "icon": get_icon_path(), "_size_bytes": -1 } for ref in item: tag = ref.tag attrib = ref.attrib if tag == "{" + self._torznab_ns + "}attr": val = attrib["value"] if isinstance(val, str): val = val.decode("utf-8") if "name" in attrib and "value" in attrib and attrib["name"] and val and \ attrib["name"] in self._torznab_elementum_mappings["torznab_attrs"]: json = self._torznab_elementum_mappings["torznab_attrs"][ attrib["name"]] result[json] = val continue if ref.tag in self._torznab_elementum_mappings[ "tags"] and ref.text is not None: json = self._torznab_elementum_mappings["tags"][ref.tag] val = ref.text.strip() if isinstance(val, str): val = val.decode("utf-8") result[json] = val # if we didn't get a magnet uri, attempt to resolve the magnet uri. # todo for some reason Elementum cannot resolve the link that gets proxied through Jackett. # So we will resolve it manually for Elementum for now. # In actuality, this should be fixed within Elementum if result["uri"] is None: link = item.find('link') jackett_uri = "" if link is not None: jackett_uri = link.text else: enclosure = item.find('enclosure') if enclosure is not None: jackett_uri = enclosure.attrib['url'] if jackett_uri != "": result["uri"] = get_magnet_from_jackett(jackett_uri) if result["name"] is None or result["uri"] is None: log.warning("Could not parse item; name = %s; uri = %s", result["name"], result["uri"]) log.debug("Failed item is: %s", ElementTree.tostring(item, encoding='utf8')) return None # result["name"] = result["name"].decode("utf-8") # might be needed for non-english items result["seeds"] = int(result["seeds"]) result["peers"] = int(result["peers"]) resolution = get_resolution(result["name"]) result["resolution"] = utils.resolutions.keys()[::-1].index(resolution) result["_resolution"] = resolution result["release_type"] = get_release_type(result["name"]) if result["size"] != "Unknown": result["_size_bytes"] = int(result["size"]) result["size"] = human_size(result["_size_bytes"]) return result