def extract_torrents(provider, client): """ Main torrent extraction generator for non-API based providers Args: provider (str): Provider ID client (Client): Client class instance Yields: tuple: A torrent result """ definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) log.debug("[%s] Extracting torrents from %s using definitions: %s" % (provider, provider, repr(definition))) if not client.content: if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Page content is empty" % provider) raise StopIteration dom = Html().feed(client.content) key_search = get_search_query(definition, "key") row_search = get_search_query(definition, "row") name_search = get_search_query(definition, "name") torrent_search = get_search_query(definition, "torrent") info_hash_search = get_search_query(definition, "infohash") size_search = get_search_query(definition, "size") seeds_search = get_search_query(definition, "seeds") peers_search = get_search_query(definition, "peers") referer_search = get_search_query(definition, "referer") log.debug("[%s] Parser: %s" % (provider, repr(definition['parser']))) q = Queue() threads = [] needs_subpage = 'subpage' in definition and definition['subpage'] if needs_subpage: def extract_subpage(q, name, torrent, size, seeds, peers, info_hash, referer): try: log.debug("[%s] Getting subpage at %s" % (provider, repr(torrent))) except Exception as e: import traceback log.error("[%s] Subpage logging failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) # New client instance, otherwise it's race conditions all over the place subclient = Client() subclient.passkey = client.passkey headers = {} if "subpage_mode" in definition: if definition["subpage_mode"] == "xhr": headers['X-Requested-With'] = 'XMLHttpRequest' headers['Content-Language'] = '' if referer: headers['Referer'] = referer uri = torrent.split('|') # Split cookies for private trackers subclient.open(uri[0].encode('utf-8'), headers=headers) if 'bittorrent' in subclient.headers.get('content-type', ''): log.debug('[%s] bittorrent content-type for %s' % (provider, repr(torrent))) if len(uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) else: try: torrent = extract_from_page(provider, subclient.content) if torrent and not torrent.startswith('magnet') and len( uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) except Exception as e: import traceback log.error( "[%s] Subpage extraction for %s failed with: %s" % (provider, repr(uri[0]), repr(e))) map(log.debug, traceback.format_exc().split("\n")) ret = (name, info_hash, torrent, size, seeds, peers) q.put_nowait(ret) if not dom: if get_setting("use_debug_parser", bool): log.debug( "[%s] Parser debug | Could not parse DOM from page content" % provider) raise StopIteration if get_setting("use_debug_parser", bool): log.debug( "[%s] Parser debug | Page content: %s" % (provider, client.content.replace('\r', '').replace('\n', ''))) key = eval(key_search) if key_search else "" if key_search and get_setting("use_debug_parser", bool): key_str = key.__str__() log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'key', key_search, key_str.replace('\r', '').replace( '\n', ''))) items = eval(row_search) if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" % (provider, len(items), 'row', row_search)) for item in items: if get_setting("use_debug_parser", bool): item_str = item.__str__() log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'row', row_search, item_str.replace( '\r', '').replace('\n', ''))) if not item: continue try: name = eval(name_search) if name_search else "" torrent = eval(torrent_search) if torrent_search else "" size = eval(size_search) if size_search else "" seeds = eval(seeds_search) if seeds_search else "" peers = eval(peers_search) if peers_search else "" info_hash = eval(info_hash_search) if info_hash_search else "" referer = eval(referer_search) if referer_search else "" if 'magnet:?' in torrent: torrent = torrent[torrent.find('magnet:?'):] if get_setting("use_debug_parser", bool): log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'name', name_search, name)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'torrent', torrent_search, torrent)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'size', size_search, size)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'seeds', seeds_search, seeds)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'peers', peers_search, peers)) if info_hash_search: log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', info_hash_search, info_hash)) if referer_search: log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', referer_search, referer)) # Pass client cookies with torrent if private if not torrent.startswith('magnet'): user_agent = USER_AGENT if client.passkey: torrent = torrent.replace('PASSKEY', client.passkey) elif client.token: headers = { 'Authorization': client.token, 'User-Agent': user_agent } log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) else: parsed_url = urlparse(torrent.split('|')[0]) cookie_domain = '{uri.netloc}'.format(uri=parsed_url) cookie_domain = re.sub('www\d*\.', '', cookie_domain) cookies = [] for cookie in client._cookies: if cookie_domain in cookie.domain: cookies.append(cookie) headers = {} if cookies: headers = {'User-Agent': user_agent} log.debug("[%s] Cookies res: %s / %s" % (provider, repr(headers), repr(client.request_headers))) if client.request_headers: headers.update(client.request_headers) if client.url: headers['Referer'] = client.url headers['Origin'] = client.url # Need to set Cookie afterwards to avoid rewriting it with session Cookies headers['Cookie'] = ";".join( ["%s=%s" % (c.name, c.value) for c in cookies]) else: headers = {'User-Agent': user_agent} torrent = append_headers(torrent, headers) if name and torrent and needs_subpage and not torrent.startswith( 'magnet'): if not torrent.startswith('http'): torrent = definition['root_url'] + torrent.encode('utf-8') t = Thread(target=extract_subpage, args=(q, name, torrent, size, seeds, peers, info_hash, referer)) threads.append(t) else: yield (name, info_hash, torrent, size, seeds, peers) except Exception as e: log.error("[%s] Got an exception while parsing results: %s" % (provider, repr(e))) if needs_subpage: log.debug("[%s] Starting subpage threads..." % provider) for t in threads: t.start() for t in threads: t.join() for i in range(q.qsize()): ret = q.get_nowait() log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret))) yield ret
def extract_torrents(provider, client): """ Main torrent extraction generator for non-API based providers Args: provider (str): Provider ID client (Client): Client class instance Yields: tuple: A torrent result """ definition = definitions[provider] log.debug("Extracting torrents from %s using definitions: %s" % (provider, repr(definition))) if not client.content: raise StopIteration dom = Html().feed(client.content) row_search = get_parser(definition, "row") name_search = get_parser(definition, "name") torrent_search = get_parser(definition, "torrent") info_hash_search = get_parser(definition, "infohash") size_search = get_parser(definition, "size") seeds_search = get_parser(definition, "seeds") peers_search = get_parser(definition, "peers") referer_search = get_parser(definition, "referer") log.debug("[%s] Parser: %s" % (provider, repr(definition['parser']))) q = Queue() threads = [] needs_subpage = 'subpage' in definition and definition['subpage'] if needs_subpage: def extract_subpage(q, name, torrent, size, seeds, peers, info_hash, referer): try: log.debug("[%s] Getting subpage at %s" % (provider, repr(torrent))) except Exception as e: import traceback log.error("[%s] Subpage logging failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) # New client instance, otherwise it's race conditions all over the place subclient = Client() subclient.passkey = client.passkey headers = {} if "subpage_mode" in definition: if definition["subpage_mode"] == "xhr": headers['X-Requested-With'] = 'XMLHttpRequest' headers['Content-Language'] = '' if referer: headers['Referer'] = referer subclient.headers = headers uri = torrent.split('|') # Split cookies for private trackers subclient.open(uri[0].encode('utf-8')) if 'bittorrent' in subclient.headers.get('content-type', ''): log.debug('[%s] bittorrent content-type for %s' % (provider, repr(torrent))) if len(uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) else: try: torrent = extract_from_page(provider, subclient.content) if torrent and not torrent.startswith('magnet') and len( uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) except Exception as e: import traceback log.error( "[%s] Subpage extraction for %s failed with: %s" % (provider, repr(uri[0]), repr(e))) map(log.debug, traceback.format_exc().split("\n")) ret = (name, info_hash, torrent, size, seeds, peers) q.put_nowait(ret) if not dom: raise StopIteration for item in eval(row_search): if not item: continue name = eval(name_search) if name_search else "" torrent = eval(torrent_search) if torrent_search else "" size = eval(size_search) if size_search else "" seeds = eval(seeds_search) if seeds_search else "" peers = eval(peers_search) if peers_search else "" info_hash = eval(info_hash_search) if info_hash_search else "" referer = eval(referer_search) if referer_search else "" if 'magnet:?' in torrent: torrent = torrent[torrent.find('magnet:?'):] # Pass client cookies with torrent if private if definition['private'] and not torrent.startswith('magnet'): user_agent = USER_AGENT if client.passkey: torrent = torrent.replace('PASSKEY', client.passkey) elif client.token: headers = { 'Authorization': client.token, 'User-Agent': user_agent } log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) else: log.debug("[%s] Cookies: %s" % (provider, repr(client.cookies()))) parsed_url = urlparse(definition['root_url']) cookie_domain = '{uri.netloc}'.format(uri=parsed_url) cookie_domain = re.sub('www\d*\.', '', cookie_domain) cookies = [] for cookie in client._cookies: if cookie_domain in cookie.domain: cookies.append(cookie) headers = {'User-Agent': user_agent} if cookies: headers['Cookie'] = ";".join( ["%s=%s" % (c.name, c.value) for c in cookies]) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) if name and torrent and needs_subpage and not torrent.startswith( 'magnet'): if not torrent.startswith('http'): torrent = definition['root_url'] + torrent.encode('utf-8') t = Thread(target=extract_subpage, args=(q, name, torrent, size, seeds, peers, info_hash, referer)) threads.append(t) else: yield (name, info_hash, torrent, size, seeds, peers) if needs_subpage: log.debug("[%s] Starting subpage threads..." % provider) for t in threads: t.start() for t in threads: t.join() log.debug("[%s] Threads returned: %s" % (provider, repr(threads))) for i in range(q.qsize()): ret = q.get_nowait() log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret))) yield ret
def extract_torrents(provider, client): """ Main torrent extraction generator for non-API based providers Args: provider (str): Provider ID client (Client): Client class instance Yields: tuple: A torrent result """ definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) log.debug("Extracting torrents from %s using definitions: %s" % (provider, repr(definition))) if not client.content: raise StopIteration dom = Html().feed(client.content) row_search = "dom." + definition['parser']['row'] name_search = definition['parser']['name'] torrent_search = definition['parser']['torrent'] info_hash_search = definition['parser']['infohash'] size_search = definition['parser']['size'] seeds_search = definition['parser']['seeds'] peers_search = definition['parser']['peers'] log.debug("[%s] Parser: %s" % (provider, repr(definition['parser']))) q = Queue() threads = [] needs_subpage = 'subpage' in definition and definition['subpage'] if needs_subpage: def extract_subpage(q, name, torrent, size, seeds, peers, info_hash): try: log.debug("[%s] Getting subpage at %s" % (provider, repr(torrent))) except Exception as e: import traceback log.error("[%s] Subpage logging failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) # New client instance, otherwise it's race conditions all over the place subclient = Client() subclient.passkey = client.passkey if get_setting("use_cloudhole", bool): subclient.clearance = get_setting('clearance') subclient.user_agent = get_setting('user_agent') uri = torrent.split('|') # Split cookies for private trackers subclient.open(uri[0].encode('utf-8')) if 'bittorrent' in subclient.headers.get('content-type', ''): log.debug('[%s] bittorrent content-type for %s' % (provider, repr(torrent))) if len(uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) else: try: torrent = extract_from_page(provider, subclient.content) if torrent and not torrent.startswith('magnet') and len( uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) except Exception as e: import traceback log.error( "[%s] Subpage extraction for %s failed with: %s" % (provider, repr(uri[0]), repr(e))) map(log.debug, traceback.format_exc().split("\n")) ret = (name, info_hash, torrent, size, seeds, peers) q.put_nowait(ret) if not dom: raise StopIteration if get_setting("use_debug_parser", bool): log.debug( "[%s] Parser debug | Page content: %s" % (provider, client.content.replace('\r', '').replace('\n', ''))) log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" % (provider, len(eval(row_search)), 'row', row_search)) for item in eval(row_search): if get_setting("use_debug_parser", bool): item_str = item.__str__() log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'row', row_search, item_str.replace( '\r', '').replace('\n', ''))) if not item: continue name = eval(name_search) torrent = eval(torrent_search) if torrent_search else "" size = eval(size_search) if size_search else "" seeds = eval(seeds_search) if seeds_search else "" peers = eval(peers_search) if peers_search else "" info_hash = eval(info_hash_search) if info_hash_search else "" if get_setting("use_debug_parser", bool): log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'name', name_search, name)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'torrent', torrent_search, torrent)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'size', size_search, size)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'seeds', seeds_search, seeds)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'peers', peers_search, peers)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', info_hash_search, info_hash)) # Pass client cookies with torrent if private if (definition['private'] or get_setting( "use_cloudhole", bool)) and not torrent.startswith('magnet'): user_agent = USER_AGENT if get_setting("use_cloudhole", bool): user_agent = get_setting("user_agent") if client.passkey: torrent = torrent.replace('PASSKEY', client.passkey) elif client.token: headers = { 'Authorization': client.token, 'User-Agent': user_agent } log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) else: log.debug("[%s] Cookies: %s" % (provider, repr(client.cookies()))) parsed_url = urlparse(definition['root_url']) cookie_domain = '{uri.netloc}'.format(uri=parsed_url).replace( 'www.', '') cookies = [] log.debug("[%s] cookie_domain: %s" % (provider, cookie_domain)) for cookie in client._cookies: log.debug( "[%s] cookie for domain: %s (%s=%s)" % (provider, cookie.domain, cookie.name, cookie.value)) if cookie_domain in cookie.domain: cookies.append(cookie) if cookies: headers = { 'Cookie': ";".join( ["%s=%s" % (c.name, c.value) for c in cookies]), 'User-Agent': user_agent } log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) if name and torrent and needs_subpage: if not torrent.startswith('http'): torrent = definition['root_url'] + torrent.encode('utf-8') t = Thread(target=extract_subpage, args=(q, name, torrent, size, seeds, peers, info_hash)) threads.append(t) else: yield (name, info_hash, torrent, size, seeds, peers) if needs_subpage: log.debug("[%s] Starting subpage threads..." % provider) for t in threads: t.start() for t in threads: t.join() log.debug("[%s] Threads returned: %s" % (provider, repr(threads))) for i in range(q.qsize()): ret = q.get_nowait() log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret))) yield ret
def search(self, searchTerm, category=None): results = [] # Login cookies = self.__login() # Prepare cookie string for resolving torrent links cookieStr = '' for cookie in cookies: if cookie.name == 'auth_token' or cookie.name == '_partis16': cookieStr += cookie.name + '=' + cookie.value + ';' # First request to /torrent/show, otherwise it forces a redirect showResponse, _ = self.__request(_Partis__SHOW_URL, {}, cookies, {}, None, 'GET') # Do actual search categories = '' if category: categories = ','.join(_Partis__CATEGORIES[category]) params = { 'keyword': searchTerm, 'category': categories, 'offset': '0', 'option': '0', 'ns': 'true', 'rnd': '0.' } searchResponse, _ = self.__request( _Partis__SEARCH_URL, params, cookies, {'X-Requested-With': 'XMLHttpRequest'}, None, 'GET') # Fix HTML before parsing searchHtml = searchResponse.read().replace('/></div>', '></div>') # Parse torrents searchDom = Html().feed(searchHtml) listeks = searchDom.find_all(tag='div', select=('class', 'listek')) for listek in listeks: # Parse basic info tId = listek(tag='div', select=('class', 'likona'), attribute='id') listeklink = listek.find_once(tag='div', select=('class', 'listeklink')) tName = listeklink(tag='a') # Get donwload link data3t = listek.find_once(tag='div', select=('class', 'data3t')) tDldLink = data3t(tag='a', attribute='href') size = listek(tag='div', select=('class', 'datat'), order=1) try: seeders = int( listek(tag='div', select=('class', 'datat'), order=2)) except Exception: seeders = 0 try: peers = int( listek(tag='div', select=('class', 'datat'), order=3)) except Exception: peers = 0 results.append({ "name": tName, "uri": _Partis__BASE_URL + tDldLink + '|Cookie=' + cookieStr, "info_hash": 'PARTIS_' + tId, "size": size, "provider": _Partis__NAME, "icon": 'logo.png', "seeds": seeders, "peers": peers, "is_private": True, "Multi": False }) return results