def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} self.set_urls(config.get('url', URL)) sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes or quotes query = query.replace('-', ' ').replace("'", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = task.requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error('Malformed search result. No title or url found. Skipping.') continue href = link.get('href') if href.startswith('/'): # relative link? href = self.url + href entry['url'] = href tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) # Parse content_size size_text = link.find_next(attrs={'class': 'detDesc'}).get_text() if size_text: size = re.search(r'Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text) if size: entry['content_size'] = parse_filesize(size.group(1)) else: log.error( 'Malformed search result? Title: "%s", No size? %s', entry['title'], size_text, ) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def parse_entry(self, res): entry = Entry() entry['title'] = res.find('a', {'class': 'torrent_name_link'})['title'] # skip if nuked if res.find('img', alt='Nuked'): log.info('Skipping entry %s (nuked)', entry['title']) return None details_url = res.find('a', {'class': 'torrent_name_link'})['href'] torrent_id = parse_qs(urlsplit(details_url).query)['id'][0] params = { 'type': 'rss', 'id': torrent_id, 'passkey': self.config['passkey'] } url = '%s/%s?%s' % (BASE_URL, DL_PAGE, urllib.urlencode(params)) entry['url'] = url log.debug('Title: %s | DL LINK: %s', (entry['title'], entry['url'])) seeds = res.find('td', {'class': 'table_seeders'}) \ .find('span').text.strip() leechers = res.find('td', {'class': 'table_leechers'}) \ .find('a').text.strip() entry['torrent_seeds'] = int(seeds) entry['torrent_leeches'] = int(leechers) size = res.find('td', attrs={'class': re.compile('table_size')}).text entry['content_size'] = parse_filesize(size) return entry
def create_entries(self, soup, imdb_id=None): entries = [] links = soup.findAll('a', attrs={'href': re.compile(r'download\.php\?torrent=\d+')}) rows = [l.find_parent('tr') for l in links] for row in rows: entry = Entry() entry['title'] = row.find('a', attrs={'href': re.compile(r'detail\.php\?id')}).text dl_href = row.find('a', attrs={'href': re.compile(r'download\.php\?torrent=\d+')}).get( 'href' ) entry['url'] = 'http://piratethenet.org' + dl_href entry['torrent_seeds'] = int(row.find(title='Number of Seeders').text) entry['torrent_leeches'] = int(row.find(title='Number of Leechers').text) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) entry['content_size'] = parse_filesize( str(row.find(title='Torrent size').text), si=False ) if imdb_id: entry['imdb_id'] = imdb_id entries.append(entry) return entries
def parse_result_entry(self, entry_page): se = '\.' + self.se + '\.' ## if se = S01 or 01 dont match with Staffelpack Demo.S01E99.German.Dubstepped.DL.EbayHD.x264-VHS english = self.config['language'] == 'english' entries = [] search_result_entries = [] filesize = 0 for p in entry_page.find_all('p'): if p.strong is not None and p.strong.text not in self.EXCLUDES: if english: if p.strong.find(text=re.compile(se, flags=re.IGNORECASE)) and not p.strong.find(text=re.compile("german", flags=re.IGNORECASE)): search_result_entries.append(self.parse_entry(p, filesize)) else: if p.strong.find(text=re.compile(se, flags=re.IGNORECASE)) and p.strong.find(text=re.compile("german", flags=re.IGNORECASE)): search_result_entries.append(self.parse_entry(p, filesize)) elif(p.find("strong", text="Größe:")): size = p.find("strong", text="Größe:").next_sibling ## experimental size = re.sub(' +',' ',size) # remove multiple whitespaces size = size.replace("|","").strip() # remove | and strip whitespaces size = re.findall('([\d]+ [\w]+)',size) if len(size) > 0: filesize = parse_filesize(size[0]) ## check for more result pages next_link = entry_page.find("a", text="»") if next_link: next_page = self.get_url_content(next_link['href']) search_result_entries.extend(self.parse_result_entry(next_page)) return [x for x in search_result_entries if x is not None]
def extract_entry_from_soup(self, soup): table = soup.find('div', {'id': 'main_table'}).find('table', {'class': 'table_info'}) if len(table.find_all('tr')) == 1: log.debug('No search results were returned, continuing') return [] entries = [] for tr in table.find_all("tr"): if not tr.get('class') or 'colhead_dark' in tr.get('class'): continue name = tr.find('div', {'class': 'main_title'}).find('a').text torrent_name = re.search('\\r\\n(.*)', tr.find('div', {'style': 'float: right;'}).find('a')['title']).group(1) attachment_link = tr.find('div', {'style': 'float: right;'}).find('a')['href'] attachment_id = re.search('attachmentid\=(\d+)', attachment_link).group(1) raw_size = tr.find_all('td', {'class': 'inline_info'})[0].text.strip() seeders = int(tr.find_all('td', {'class': 'inline_info'})[2].text) leechers = int(tr.find_all('td', {'class': 'inline_info'})[3].text) e = Entry() e['title'] = name final_url = 'https://www.fuzer.me/rss/torrent.php/{}/{}/{}/{}'.format(attachment_id, self.user_id, self.rss_key, torrent_name) log.debug('RSS-ified download link: %s' % final_url) e['url'] = final_url e['torrent_seeds'] = seeders e['torrent_leeches'] = leechers e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) size = re.search('(\d+.?\d+)([TGMK]?)B', raw_size) e['content_size'] = parse_filesize(size.group(0)) entries.append(e) return entries
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} self.set_urls(config.get('url', URL)) sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes or quotes query = query.replace('-', ' ').replace("'", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = task.requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error('Malformed search result. No title or url found. Skipping.') continue href = link.get('href') if href.startswith('/'): # relative link? href = self.url + href entry['url'] = href tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) # Parse content_size size_text = link.find_next(attrs={'class': 'detDesc'}).get_text() if size_text: size = re.search(r'Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text) if size: entry['content_size'] = parse_filesize(size.group(1)) else: log.error( 'Malformed search result? Title: "%s", No size? %s', entry['title'], size_text, ) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query) log.debug('searching with url: %s' % url) req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']}) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): log.debug('No results found for search %s', search_string) return entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=link, key=config.get('rss_key')) entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$')) size = re.search('^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) log.debug('Found entry %s', entry) entries.add(entry) return entries
def parse_result_entry(self, entry_page): title = entry_page.find("div", id="title").find("a")["title"] beschreibung = entry_page.find("div", {"class":"beschreibung"}) size = 0 sizetag = beschreibung.find("strong", text="Größe:") if sizetag: size = parse_filesize(sizetag.next_sibling) #else: # print "no sizetag!!" # pre = beschreibung.find("pre").text # if pre: # se = re.find('(File size)[\s]+:([ \d\.\w]+)',pre) imdb_url = "" links = entry_page.find("div", id="content").findAll("a") for link in links: if "imdb" in link.text.lower(): url = link['href'] imdb_url = url[url.index('?')+1:].strip() if imdb_url and not imdb_url.startswith('http'): imdb_url = 'http://'+imdb_url links = entry_page.findAll("span", {"style":"display:inline;"}) dl_links = [] for link in links: if self.contains_hoster_variant(link.text): dl_links.append(link.a["href"]) return [SearchResultEntry(title, size, dl_links, imdb_url)]
def get_entries(self, search_results): """Generator that yields Entry objects from search results""" for result in search_results: # Get basic information on the release info = dict( (k, result[k]) for k in ('artist', 'groupName', 'groupYear')) # Releases can have multiple download options for tor in result['torrents']: temp = info.copy() temp.update( dict( (k, tor[k]) for k in ('media', 'encoding', 'format', 'torrentId'))) yield Entry( title="{artist} - {groupName} - {groupYear} " "({media} - {format} - {encoding})-{torrentId}.torrent". format(**temp), url= "{}/torrents.php?action=download&id={}&authkey={}&torrent_pass={}" "".format(self.base_url, temp['torrentId'], self.authkey, self.passkey), torrent_seeds=tor['seeders'], torrent_leeches=tor['leechers'], # Size is returned in bytes content_size=parse_filesize(str(tor['size']) + "b"), )
def create_entries(self, soup, imdb_id=None): entries = [] links = soup.findAll( 'a', attrs={'href': re.compile(r'download\.php\?torrent=\d+')}) rows = [l.find_parent('tr') for l in links] for row in rows: entry = Entry() entry['title'] = row.find('a', attrs={ 'href': re.compile(r'detail\.php\?id') }).text dl_href = row.find('a', attrs={ 'href': re.compile(r'download\.php\?torrent=\d+') }).get('href') entry['url'] = 'http://piratethenet.org' + dl_href entry['torrent_seeds'] = int( row.find(title='Number of Seeders').text) entry['torrent_leeches'] = int( row.find(title='Number of Leechers').text) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str( row.find(title='Torrent size').text), si=False) if imdb_id: entry['imdb_id'] = imdb_id entries.append(entry) return entries
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query) log.debug('searching with url: %s' % url) req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']}) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): log.debug('No results found for search %s', search_string) break entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=link, key=config.get('rss_key')) entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$')) size = re.search('^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) log.debug('Found entry %s', entry) entries.add(entry) return entries
def parse_result_entry(self, entry_page): try: beitrag = entry_page.find("div", {"id":"content"}) title = entry_page.find("div", {"class":self.html_entry_class}).find(self.html_entry_title_element).a.text size = 0 sizetag = beitrag.find("strong", text="Größe: ") if sizetag: size = parse_filesize(sizetag.next_sibling.replace("|", "").strip()) links = entry_page.findAll("a") dl_links = [] imdb_url = "" for link in links: if "imdb" in link.text.lower(): imdb_url = link["href"] if self.contains_hoster_variant(link.text): dl_links.append(link["href"]) return [SearchResultEntry(title, size, dl_links, imdb_url)] except Exception: log.error("Got unexpected result page - maybe no valid search results on that page?") #self.log_soup_to_file(entry_page) finally: return []
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s' % str(e)) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ('https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key, torrent['filename']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s', str(e)) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ('https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key, torrent['filename']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config): """ Search for entries on 1337x """ if not isinstance(config, dict): config = {} order_by = '' sort_order = '' if isinstance(config.get('order_by'), str): if config['order_by'] != 'leechers': order_by = '/{0}/desc'.format(config['order_by']) sort_order = 'sort-' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = '{0}search/{1}{2}/1/'.format( sort_order, quote(search_string.encode('utf8')), order_by) log.debug('Using search params: %s; ordering by: %s', search_string, order_by or 'default') try: page = task.requests.get(self.base_url + query) log.debug('requesting: %s', page.url) except RequestException as e: log.error('1337x request failed: %s', e) continue soup = get_soup(page.content) if soup.find('div', attrs={'class': 'tab-detail'}) is not None: for link in soup.find('div', attrs={ 'class': 'tab-detail' }).findAll('a', href=re.compile('^/torrent/')): li = link.parent.parent.parent title = str(link.text).replace('...', '') info_url = self.base_url + str(link.get('href'))[1:] seeds = int(li.find('span', class_='green').string) leeches = int(li.find('span', class_='red').string) size = str(li.find('div', class_='coll-4').string) size = parse_filesize(size) e = Entry() e['url'] = info_url e['title'] = title e['torrent_seeds'] = seeds e['torrent_leeches'] = leeches e['search_sort'] = torrent_availability( e['torrent_seeds'], e['torrent_leeches']) e['content_size'] = size entries.add(e) return entries
def search(self, task, entry, config=None): """ Search for entries on SceneAccess """ session = task.requests if 'sceneaccess.eu' not in session.domain_limiters: session.add_domain_limiter(TimedLimiter('sceneaccess.eu', '7 seconds')) if not session.cookies: log.debug('Logging in to %s...' % URL) params = {'username': config['username'], 'password': config['password'], 'submit': 'come on in'} session.post(URL + 'login', data=params) if 'gravity_multiplier' in config: multip = config['gravity_multiplier'] else: multip = 1 # Prepare queries... base_urls = list() entries = set() for category in self.process_categories(config): base_urls.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category) # Search... for search_string in entry.get('search_strings', [entry['title']]): search_string_normalized = normalize_unicode(clean_title(search_string)) search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8')) for url in base_urls: url += search_string_url_fragment log.debug('Search URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'tt_row'}): entry = Entry() entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title'] entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href'] entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.find('td', attrs={'class': 'ttr_size'}).text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return entries
def search(self, task, entry, config): """ Search for entries on MoreThanTV """ params = {} if 'category' in config: categories = config['category'] if isinstance(config['category'], list) else [config['category']] for category in categories: params[CATEGORIES[category]] = 1 if 'tags' in config: tags = config['tags'] if isinstance(config['tags'], list) else [config['tags']] tags = ', '.join(tags) params['taglist'] = tags entries = set() params.update({'tags_type': int(config['all_tags']), 'order_by': config['order_by'], 'search_submit': 1, 'order_way': config['order_way'], 'action': 'basic', 'group_results': 0}) for search_string in entry.get('search_strings', [entry['title']]): params['searchstr'] = search_string log.debug('Using search params: %s', params) try: page = self.get(self.base_url + 'torrents.php', params, config['username'], config['password']) log.debug('requesting: %s', page.url) except RequestException as e: log.error('MoreThanTV request failed: %s', e) continue soup = get_soup(page.content) for result in soup.findAll('tr', attrs={'class': 'torrent'}): group_info = result.find('td', attrs={'class': 'big_info'}).find('div', attrs={'class': 'group_info'}) title = group_info.find('a', href=re.compile('torrents.php\?id=\d+')).text url = self.base_url + group_info.find('a', href=re.compile('torrents.php\?action=download'))['href'] torrent_info = result.findAll('td', attrs={'class': 'number_column'}) size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', torrent_info[0].text) torrent_tags = ', '.join([tag.text for tag in group_info.findAll('div', attrs={'class': 'tags'})]) e = Entry() e['title'] = title e['url'] = url e['torrent_snatches'] = int(torrent_info[1].text) e['torrent_seeds'] = int(torrent_info[2].text) e['torrent_leeches'] = int(torrent_info[3].text) e['torrent_internal'] = True if group_info.find('span', attrs={'class': 'flag_internal'}) else False e['torrent_fast_server'] = True if group_info.find('span', attrs={'class': 'flag_fast'}) else False e['torrent_sticky'] = True if group_info.find('span', attrs={'class': 'flag_sticky'}) else False e['torrent_tags'] = torrent_tags e['content_size'] = parse_filesize(size.group(0)) entries.add(e) return entries
def search(self, task, entry, config=None): """ Search for name from torrentday. """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] params = { 'cata': 'yes', 'c%s' % ','.join(str(c) for c in categories): 1, 'clear-new': 1} entries = set() for search_string in entry.get('search_strings', [entry['title']]): url = 'https://www.torrentday.com/browse.php' params['search'] = normalize_unicode(search_string).replace(':', '') cookies = { 'uid': config['uid'], 'pass': config['passkey'], '__cfduid': config['cfduid'] } try: page = requests.get(url, params=params, cookies=cookies).content except RequestException as e: raise PluginError('Could not connect to torrentday: %s' % e) soup = get_soup(page) for tr in soup.find_all('tr', { 'class': 'browse' }): entry = Entry() # find the torrent names title = tr.find('a', { 'class': 'torrentName' }) entry['title'] = title.contents[0] log.debug('title: %s', title.contents[0]) # find download link torrent_url = tr.find('td', { 'class': 'dlLinksInfo' }) torrent_url = torrent_url.find('a').get('href') # construct download URL torrent_url = ( 'https://www.torrentday.com/' + torrent_url + '?torrent_pass='******'rss_key'] ) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders, leechers = tr.find_all('td', { 'class': ['seedersInfo', 'leechersInfo']}) entry['torrent_seeds'] = int(seeders.contents[0].replace(',', '')) entry['torrent_leeches'] = int(leechers.contents[0].replace(',', '')) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # use tr object for size size = tr.find('td', text=re.compile('([\.\d]+) ([TGMKk]?)B')).contents[0] size = re.search('([\.\d]+) ([TGMKk]?)B', str(size)) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, task, entry, config): """ Search for entries on AlphaRatio """ params = {} if 'category' in config: categories = config['category'] if isinstance(config['category'], list) else [config['category']] for category in categories: params[CATEGORIES[category]] = 1 if 'scene' in config: params['scene'] = int(config['scene']) ordering = 'desc' if config['order_desc'] else 'asc' entries = set() params.update({'order_by': config['order_by'], 'search_submit': 1, 'action': 'basic', 'order_way': ordering, 'freeleech': LEECHSTATUS[config['leechstatus']]}) for search_string in entry.get('search_strings', [entry['title']]): params['searchstr'] = search_string log.debug('Using search params: %s', params) try: page = self.get(self.base_url + 'torrents.php', params, config['username'], config['password']) log.debug('requesting: %s', page.url) except RequestException as e: log.error('AlphaRatio request failed: %s', e) continue soup = get_soup(page.content) for result in soup.findAll('tr', attrs={'class': 'torrent'}): group_info = result.find('td', attrs={'class': 'big_info'}).find('div', attrs={'class': 'group_info'}) title = group_info.find('a', href=re.compile('torrents.php\?id=\d+')).text url = self.base_url + \ group_info.find('a', href=re.compile('torrents.php\?action=download(?!usetoken)'))['href'] torrent_info = result.findAll('td') log.debug('AlphaRatio size: %s', torrent_info[5].text) size = re.search('(\d+(?:[.,]\d+)*)\s?([KMGTP]B)', torrent_info[4].text) torrent_tags = ', '.join([tag.text for tag in group_info.findAll('div', attrs={'class': 'tags'})]) e = Entry() e['title'] = title e['url'] = url e['torrent_tags'] = torrent_tags e['content_size'] = parse_filesize(size.group(0)) e['torrent_snatches'] = int(torrent_info[5].text) e['torrent_seeds'] = int(torrent_info[6].text) e['torrent_leeches'] = int(torrent_info[7].text) entries.add(e) return entries
def search(self, task, entry, config): """ Search for entries on 1337x """ if not isinstance(config, dict): config = {} order_by = '' sort_order = '' if isinstance(config.get('order_by'), str): if config['order_by'] != 'leechers': order_by = '/{0}/desc'.format(config['order_by']) sort_order = 'sort-' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = '{0}search/{1}{2}/1/'.format(sort_order, quote(search_string.encode('utf8')), order_by) log.debug('Using search params: %s; ordering by: %s', search_string, order_by or 'default') try: page = task.requests.get(self.base_url + query) log.debug('requesting: %s', page.url) except RequestException as e: log.error('1337x request failed: %s', e) continue soup = get_soup(page.content) if soup.find('div', attrs={'class': 'tab-detail'}) is not None: for link in soup.find('div', attrs={'class': 'tab-detail'}).findAll('a', href=re.compile('^/torrent/')): li = link.parent.parent.parent title = str(link.text).replace('...', '') info_url = self.base_url + str(link.get('href'))[1:] seeds = int(li.find('span', class_='green').string) leeches = int(li.find('span', class_='red').string) size = str(li.find('div', class_='coll-4').string) size = parse_filesize(size) e = Entry() e['url'] = info_url e['title'] = title e['torrent_seeds'] = seeds e['torrent_leeches'] = leeches e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) e['content_size'] = size entries.add(e) return entries
def search(self, task, entry, config=None): config = self.prepare_config(config) if not session.cookies: log.debug('Logging in to %s...' % URL) params = { 'username': config['username'], 'password': config['password'], 'keeplogged': '1', 'login': '******' } session.post(URL + 'login.php', data=params) cat = ''.join([ '&' + ('filter_cat[%s]' % id) + '=1' for id in config['category'] ]) rls = 'release_type=' + config['type'] url_params = rls + cat multip = config['gravity_multiplier'] entries = set() for search_string in entry.get('search_strings', [entry['title']]): srch = normalize_unicode(clean_title(search_string)) srch = '&searchstr=' + quote(srch.encode('utf8')) url = URL + 'torrents.php?' + url_params + srch log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'torrent'}): entry = Entry() entry['title'] = result.find('span', attrs={ 'class': 'torrent_name_link' }).text entry['url'] = URL + result.find( 'a', href=re.compile( 'torrents\.php\?action=download')).get('href') entry['torrent_seeds'], entry['torrent_leeches'] = [ r.text for r in result.findAll('td')[-2:] ] entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.findAll('td')[-4].text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return entries
def search(self, task, entry, config=None): entries = set() search_strings = [ normalize_unicode(s) for s in entry.get('search_strings', [entry['title']]) ] for search_string in search_strings: url = 'https://yts.am/api/v2/list_movies.json?query_term=%s' % ( quote(search_string.encode('utf-8'))) log.debug('requesting: %s' % url) try: result = requests.get(url) try: data = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting result from yts.') except requests.RequestException as e: raise plugin.PluginError( 'Could not retrieve query from yts (%s)' % e.args[0]) if not data['status'] == 'ok': raise plugin.PluginError('failed to query YTS') try: if data['data']['movie_count'] > 0: for item in data['data']['movies']: for torrent in item['torrents']: entry = Entry() entry['title'] = item['title'] entry['year'] = item['year'] entry['url'] = torrent['url'] entry['content_size'] = parse_filesize( str(torrent['size_bytes']) + "b") entry['torrent_seeds'] = torrent['seeds'] entry['torrent_leeches'] = torrent['peers'] entry['torrent_info_hash'] = torrent['hash'] entry[ 'torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entry['quality'] = torrent['quality'] entry['imdb_id'] = item['imdb_code'] if entry.isvalid(): entries.add(entry) except Exception: log.debug('invalid return structure from YTS') log.debug('Search got %d results' % len(entries)) return entries
def parse_page(self, scraper, url: str): try: logger.debug('page url: {}', url) page = scraper.get(url) except RequestException as e: raise plugin.PluginError(str(e)) if page.status_code == 404: raise Page404Error() if page.status_code != 200: raise plugin.PluginError( f'HTTP Request failed {page.status_code}. Url: {url}') soup = get_soup(page.text) soup_table = soup.find('table', class_='download') if not soup_table: # very likely no result return table_tbody = soup_table.find('tbody') if not table_tbody: raise plugin.PluginError( 'Parsing crashed, no tbody, please report the issue') trs = table_tbody.find_all('tr') if not trs: logger.critical('Nothing to parse') return for tr in trs: try: magnet_td = tr.find('td', class_='m') if not magnet_td: # skip empty trs continue magnet_a = magnet_td.find('a') magnet = magnet_a['href'] title_td = tr.find('td', class_='n') title_a = title_td.find('a') title = title_a['title'] seed_td = tr.find('td', class_='s') seed = int(seed_td.text) leech = int(tr.find('td', class_='l').text) content_size = parse_filesize(seed_td.previous_sibling.text) yield Entry( url=magnet, title=title, torrent_seeds=seed, torrent_leech=leech, content_size=content_size, ) except AttributeError as e: raise plugin.PluginError( 'Parsing crashed, please report the issue') from e
def search(self, task, entry, config=None): entries = set() search_strings = [ normalize_unicode(s) for s in entry.get('search_strings', [entry['title']]) ] for search_string in search_strings: url = 'https://yts.am/api/v2/list_movies.json?query_term=%s' % ( urllib.quote(search_string.encode('utf-8')) ) log.debug('requesting: %s' % url) try: result = requests.get(url) try: data = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting result from yts.') except requests.RequestException as e: raise plugin.PluginError('Could not retrieve query from yts (%s)' % e.args[0]) if not data['status'] == 'ok': raise plugin.PluginError('failed to query YTS') try: if data['data']['movie_count'] > 0: for item in data['data']['movies']: for torrent in item['torrents']: entry = Entry() entry['title'] = item['title'] entry['year'] = item['year'] entry['url'] = torrent['url'] entry['content_size'] = parse_filesize( str(torrent['size_bytes']) + "b" ) entry['torrent_seeds'] = torrent['seeds'] entry['torrent_leeches'] = torrent['peers'] entry['torrent_info_hash'] = torrent['hash'] entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) entry['quality'] = torrent['quality'] entry['imdb_id'] = item['imdb_code'] if entry.isvalid(): entries.add(entry) except Exception: log.debug('invalid return structure from YTS') log.debug('Search got %d results' % len(entries)) return entries
def extract_entry_from_soup(self, soup): table = soup.find('div', {'id': 'main_table'}) if table is None: raise PluginError( 'Could not fetch results table from Fuzer, aborting') log.trace('fuzer results table: %s', table) table = table.find('table', {'class': 'table_info'}) if len(table.find_all('tr')) == 1: log.debug('No search results were returned from Fuzer, continuing') return [] entries = [] for tr in table.find_all("tr"): if not tr.get('class') or 'colhead_dark' in tr.get('class'): continue name = tr.find('div', {'class': 'main_title'}).find('a').text torrent_name = re.search( '\\n(.*)', tr.find('div', { 'style': 'float: right;' }).find('a')['title']).group(1) attachment_link = tr.find('div', { 'style': 'float: right;' }).find('a')['href'] attachment_id = re.search('attachmentid=(\d+)', attachment_link).group(1) raw_size = tr.find_all('td', {'class': 'inline_info'})[0].text.strip() seeders = int(tr.find_all('td', {'class': 'inline_info'})[2].text) leechers = int(tr.find_all('td', {'class': 'inline_info'})[3].text) e = Entry() e['title'] = name final_url = 'https://www.fuzer.me/rss/torrent.php/{}/{}/{}/{}'.format( attachment_id, self.user_id, self.rss_key, torrent_name) log.debug('RSS-ified download link: %s', final_url) e['url'] = final_url e['torrent_seeds'] = seeders e['torrent_leeches'] = leechers e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) size = re.search('(\d+(?:[.,]\d+)*)\s?([KMGTP]B)', raw_size) e['content_size'] = parse_filesize(size.group(0)) entries.append(e) return entries
def search(self, task, entry, config): if not isinstance(config, dict): config = {'category': config} config.setdefault('category', 'anime eng') config.setdefault('filter', 'all') entries = set() for search_string in entry.get('search_strings', [entry['title']]): name = normalize_unicode(search_string) url = 'https://www.nyaa.si/?page=rss&q=%s&c=%s&f=%s' % ( quote(name.encode('utf-8')), CATEGORIES[config['category']], FILTERS.index(config['filter']), ) log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if status >= 400: continue ex = rss.get('bozo_exception', False) if ex: log.error('Got bozo_exception (bad feed) on %s' % url) continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['torrent_seeds'] = int(item.nyaa_seeders) entry['torrent_leeches'] = int(item.nyaa_leechers) entry['torrent_info_hash'] = item.nyaa_infohash entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) if item.nyaa_size: entry['content_size'] = parse_filesize(item.nyaa_size) entries.add(entry) return entries
def search(self, task, entry, config): if not isinstance(config, dict): config = {'category': config} config.setdefault('category', 'anime eng') config.setdefault('filter', 'all') entries = set() for search_string in entry.get('search_strings', [entry['title']]): name = normalize_unicode(search_string) url = 'https://www.nyaa.si/?page=rss&q=%s&c=%s&f=%s' % ( quote(name.encode('utf-8')), CATEGORIES[config['category']], FILTERS.index(config['filter']), ) log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if status >= 400: continue ex = rss.get('bozo_exception', False) if ex: log.error('Got bozo_exception (bad feed) on %s' % url) continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['torrent_seeds'] = int(item.nyaa_seeders) entry['torrent_leeches'] = int(item.nyaa_leechers) entry['torrent_info_hash'] = item.nyaa_infohash entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) if item.nyaa_size: entry['content_size'] = parse_filesize(item.nyaa_size) entries.add(entry) return entries
def search(self, task, entry, config=None): config = self.prepare_config(config) if not session.cookies: log.debug('Logging in to %s...' % URL) params = { 'username': config['username'], 'password': config['password'], 'keeplogged': '1', 'login': '******' } session.post(URL + 'login.php', data=params) cat = ''.join(['&' + ('filter_cat[%s]' % id) + '=1' for id in config['category']]) rls = 'release_type=' + config['type'] url_params = rls + cat multip = config['gravity_multiplier'] entries = set() for search_string in entry.get('search_strings', [entry['title']]): srch = normalize_unicode(clean_title(search_string)) srch = '&searchstr=' + quote(srch.encode('utf8')) url = URL + 'torrents.php?' + url_params + srch log.debug('Fetching URL for `%s`: %s' % (search_string, url)) page = session.get(url).content soup = get_soup(page) for result in soup.findAll('tr', attrs={'class': 'torrent'}): entry = Entry() entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).text entry['url'] = URL + result.find('a', href=re.compile('torrents\.php\?action=download')).get('href') entry['torrent_seeds'], entry['torrent_leeches'] = [r.text for r in result.findAll('td')[-2:]] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip size = result.findAll('td')[-4].text size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return entries
def parse_result_entry(self, entry_page): se = '\.' + self.se + '\.' ## if se = S01 or 01 dont match with Staffelpack Demo.S01E99.German.Dubstepped.DL.EbayHD.x264-VHS english = self.config['language'] == 'english' entries = [] search_result_entries = [] filesize = 0 for p in entry_page.find_all('p'): if p.strong is not None and p.strong.text not in self.EXCLUDES: if english: if p.strong.find( text=re.compile(se, flags=re.IGNORECASE) ) and not p.strong.find( text=re.compile("german", flags=re.IGNORECASE)): search_result_entries.append( self.parse_entry(p, filesize)) else: if p.strong.find( text=re.compile(se, flags=re.IGNORECASE) ) and p.strong.find( text=re.compile("german", flags=re.IGNORECASE)): search_result_entries.append( self.parse_entry(p, filesize)) elif (p.find("strong", text="Größe:")): size = p.find("strong", text="Größe:").next_sibling ## experimental size = re.sub(' +', ' ', size) # remove multiple whitespaces size = size.replace( "|", "").strip() # remove | and strip whitespaces size = re.findall('([\d]+ [\w]+)', size) if len(size) > 0: filesize = parse_filesize(size[0]) ## check for more result pages next_link = entry_page.find("a", text="»") if next_link: next_page = self.get_url_content(next_link['href']) search_result_entries.extend(self.parse_result_entry(next_page)) return [x for x in search_result_entries if x is not None]
def get_entries(self, search_results): """Generator that yields Entry objects from search results""" for result in search_results: # Get basic information on the release info = dict((k, result[k]) for k in ('groupId', 'groupName')) # Releases can have multiple download options for tor in result['torrents']: temp = info.copy() temp['torrentId'] = tor['torrentId'] yield Entry( title="{groupName} ({groupId} - {torrentId}).torrent".format(**temp), url="{}/torrents.php?action=download&id={}&authkey={}&torrent_pass={}" "".format(self.base_url, temp['torrentId'], self.authkey, self.passkey), torrent_seeds=tor['seeders'], torrent_leeches=tor['leechers'], # Size is returned in bytes content_size=parse_filesize(str(tor['size']) + "b") )
def search(self, task, entry, config=None): """ Search for name from torrent411. """ url_base = 'http://www.t411.li' if not isinstance(config, dict): config = {} category = config.get('category') if category in list(CATEGORIES): category = CATEGORIES[category] sub_categories = config.get('sub_category') if not isinstance(sub_categories, list): sub_categories = [sub_categories] filter_url = '' if isinstance(category, int): filter_url = '&cat=%s' % str(category) if sub_categories[0] is not None: sub_categories = [SUB_CATEGORIES[c] for c in sub_categories] filter_url = filter_url + '&' + '&'.join([urllib.parse.quote_plus('term[%s][]' % c[0]). encode('utf-8') + '=' + str(c[1]) for c in sub_categories]) if 'series_season' in entry and 'series_episode' in entry: season = entry['series_season'] if season in list(SEASONS): filter_url = filter_url + '&term[%d][]' % SEASONS[season][0] + '=' + str(SEASONS[season][1]) episode = entry['series_episode'] if episode in list(EPISODES): filter_url = filter_url + '&term[%d][]' % EPISODES[episode][0] + '=' + str(EPISODES[episode][1]) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) url_search = ('/torrents/search/?search=%40name+' + urllib.parse.quote_plus(query.encode('utf-8')) + filter_url) opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(url_base + url_search) data = response.read() soup = get_soup(data) tb = soup.find("table", class_="results") if not tb: continue for tr in tb.findAll('tr')[1:][:-1]: entry = Entry() nfo_link_res = re.search('torrents/nfo/\?id=(\d+)', str(tr)) if nfo_link_res is not None: tid = nfo_link_res.group(1) title_res = re.search( '<a href=\"//www.t411.li/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)\" title="([^"]*)">', str(tr)) if title_res is not None: entry['title'] = native_str_to_text(title_res.group(2), encoding='utf-8') size = tr('td')[5].contents[0] entry['url'] = 'http://www.t411.li/torrents/download/?id=%s' % tid entry['torrent_seeds'] = tr('td')[7].contents[0] entry['torrent_leeches'] = tr('td')[8].contents[0] entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) size = re.search('([\.\d]+) ([GMK]?)B', size) entry['content_size'] = parse_filesize(size.group(0)) auth_handler = t411Auth(config['username'], config['password']) entry['download_auth'] = auth_handler entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def test_parse_filesize_separators(self): size = '1,234 GiB' assert parse_filesize(size) == 1263616 size = '1 234 567 MiB' assert parse_filesize(size) == 1234567
def search(self, task, entry, config): """ Search for entries on FileList.ro """ entries = set() params = { 'cat': CATEGORIES[config['category']], 'incldead': int(config['include_dead']), 'order_by': SORTING[config['order_by']], 'searchin': SEARCH_IN[config['search_in']], 'asc': int(config['order_ascending']) } for search_string in entry.get('search_strings', [entry['title']]): params['search'] = search_string log.debug('Using search params: %s', params) try: page = self.get(BASE_URL + 'browse.php', params, config['username'], config['password']) log.debug('requesting: %s', page.url) except RequestException as e: log.error('FileList.ro request failed: %s', e) continue soup = get_soup(page.content) for result in soup.findAll('div', attrs={'class': 'torrentrow'}): e = Entry() torrent_info = result.findAll('div', attrs={'class': 'torrenttable'}) # genres genres = torrent_info[1].find('font') if genres: genres = genres.text.lstrip('[').rstrip(']').replace(' ', '') genres = genres.split('|') tags = torrent_info[1].findAll('img') freeleech = False internal = False for tag in tags: if tag.get('alt', '').lower() == 'freeleech': freeleech = True if tag.get('alt', '').lower() == 'internal': internal = True title = torrent_info[1].find('a').get('title') # this is a dirty fix to get the full title since their developer is a moron if re.match("\<img src=\'.*\'\>", title): title = torrent_info[1].find('b').text # if the title is shortened, then do a request to get the full one :( if title.endswith('...'): url = BASE_URL + torrent_info[1].find('a')['href'] try: request = self.get(url, {}, config['username'], config['password']) except RequestException as e: log.error('FileList.ro request failed: %s', e) continue title_soup = get_soup(request.content) title = title_soup.find('div', attrs={'class': 'cblock-header'}).text e['title'] = title e['url'] = BASE_URL + torrent_info[3].find('a')['href'] + '&passkey=' + config['passkey'] e['content_size'] = parse_filesize(torrent_info[6].find('font').text) e['torrent_snatches'] = int(torrent_info[7].find('font').text.replace(' ', '').replace('times', '') .replace(',', '')) e['torrent_seeds'] = int(torrent_info[8].find('span').text) e['torrent_leeches'] = int(torrent_info[9].find('span').text) e['torrent_internal'] = internal e['torrent_freeleech'] = freeleech if genres: e['torrent_genres'] = genres entries.add(e) return entries
def search(self, task, entry, config=None): """ Search for name from torrentday. """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [ c if isinstance(c, int) else CATEGORIES[c] for c in categories ] params = { 'cata': 'yes', 'c{}'.format(','.join(str(c) for c in categories)): 1, 'clear-new': 1, } entries = set() for search_string in entry.get('search_strings', [entry['title']]): url = 'https://www.torrentday.com/t' params['q'] = normalize_unicode(search_string).replace(':', '') cookies = { 'uid': config['uid'], 'pass': config['passkey'], '__cfduid': config['cfduid'], } try: page = requests.get(url, params=params, cookies=cookies).content except RequestException as e: raise PluginError( 'Could not connect to torrentday: {}'.format(e)) # the following should avoid table being None due to a malformed # html in td search results soup = get_soup(page).contents[1].contents[1].next.next.nextSibling table = soup.find('table', {'id': 'torrentTable'}) if table is None: raise PluginError( 'Search returned by torrentday appears to be empty or malformed.' ) # the first row is the header so skip it for tr in table.find_all('tr')[1:]: entry = Entry() # find the torrent names td = tr.find('td', {'class': 'torrentNameInfo'}) if not td: log.warning('Could not find entry torrentNameInfo for %s.', search_string) continue title = td.find('a') if not title: log.warning('Could not determine title for %s.', search_string) continue entry['title'] = title.contents[0] log.debug('title: %s', title.contents[0]) # find download link torrent_url = tr.find('td', {'class': 'ac'}) if not torrent_url: log.warning('Could not determine download link for %s.', search_string) continue torrent_url = torrent_url.find('a').get('href') # construct download URL torrent_url = ('https://www.torrentday.com/' + torrent_url + '?torrent_pass='******'rss_key']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders = tr.find('td', {'class': 'ac seedersInfo'}) leechers = tr.find('td', {'class': 'ac leechersInfo'}) entry['torrent_seeds'] = int(seeders.contents[0].replace( ',', '')) entry['torrent_leeches'] = int(leechers.contents[0].replace( ',', '')) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # use tr object for size size = tr.find( 'td', text=re.compile(r'([\.\d]+) ([TGMKk]?)B')).contents[0] size = re.search(r'([\.\d]+) ([TGMKk]?)B', str(size)) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def search(self, task, entry, config=None): """ Search for name from torrentday. """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] params = { 'cata': 'yes', 'c{}'.format(','.join(str(c) for c in categories)): 1, 'clear-new': 1} entries = set() for search_string in entry.get('search_strings', [entry['title']]): url = 'https://www.torrentday.com/t' params['q'] = normalize_unicode(search_string).replace(':', '') cookies = { 'uid': config['uid'], 'pass': config['passkey'], '__cfduid': config['cfduid'] } try: page = requests.get(url, params=params, cookies=cookies).content except RequestException as e: raise PluginError('Could not connect to torrentday: {}'.format(e)) # the following should avoid table being None due to a malformed # html in td search results soup = get_soup(page).contents[1].contents[1].next.next.nextSibling table = soup.find('table', {'id': 'torrentTable'}) if (table is None): raise PluginError('Search returned by torrentday appears to be empty or malformed.') # the first row is the header so skip it for tr in table.find_all('tr')[1:]: entry = Entry() # find the torrent names td = tr.find('td', { 'class': 'torrentNameInfo' }) if not td: log.warning('Could not find entry torrentNameInfo for %s.', search_string) continue title = td.find('a') if not title: log.warning('Could not determine title for %s.', search_string) continue entry['title'] = title.contents[0] log.debug('title: %s', title.contents[0]) # find download link torrent_url = tr.find('td', { 'class': 'ac' }) if not torrent_url: log.warning('Could not determine download link for %s.', search_string) continue torrent_url = torrent_url.find('a').get('href') # construct download URL torrent_url = ( 'https://www.torrentday.com/' + torrent_url + '?torrent_pass='******'rss_key'] ) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders = tr.find('td', { 'class': 'ac seedersInfo'}) leechers = tr.find('td', { 'class': 'ac leechersInfo'}) entry['torrent_seeds'] = int(seeders.contents[0].replace(',', '')) entry['torrent_leeches'] = int(leechers.contents[0].replace(',', '')) entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # use tr object for size size = tr.find('td', text=re.compile('([\.\d]+) ([TGMKk]?)B')).contents[0] size = re.search('([\.\d]+) ([TGMKk]?)B', str(size)) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def test_parse_filesize_auto_mib(self): size = '1234 MiB' assert compare_floats(parse_filesize(size), 1234)
def search(self, task, entry, config=None): """ Search for name from torrent411. """ url_base = 'https://www.t411.al' if not isinstance(config, dict): config = {} category = config.get('category') if category in list(CATEGORIES): category = CATEGORIES[category] sub_categories = config.get('sub_category') if not isinstance(sub_categories, list): sub_categories = [sub_categories] filter_url = '' if isinstance(category, int): filter_url = '&cat=%s' % str(category) if sub_categories[0] is not None: sub_categories = [SUB_CATEGORIES[c] for c in sub_categories] filter_url = filter_url + '&' + '&'.join([ urllib.parse.quote_plus( 'term[%s][]' % c[0]).encode('utf-8') + '=' + str(c[1]) for c in sub_categories ]) if 'series_season' in entry and 'series_episode' in entry: season = entry['series_season'] if season in list(SEASONS): filter_url = filter_url + '&term[%d][]' % SEASONS[season][ 0] + '=' + str(SEASONS[season][1]) episode = entry['series_episode'] if episode in list(EPISODES): filter_url = filter_url + '&term[%d][]' % EPISODES[episode][ 0] + '=' + str(EPISODES[episode][1]) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) url_search = ('/torrents/search/?search=%40name+' + urllib.parse.quote_plus(query.encode('utf-8')) + filter_url) opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(url_base + url_search) data = response.read() soup = get_soup(data) tb = soup.find("table", class_="results") if not tb: continue for tr in tb.findAll('tr')[1:][:-1]: entry = Entry() nfo_link_res = re.search('torrents/nfo/\?id=(\d+)', str(tr)) if nfo_link_res is not None: tid = nfo_link_res.group(1) title_res = re.search( '<a href=\"//www.t411.al/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)\" title="([^"]*)">', str(tr)) if title_res is not None: entry['title'] = native_str_to_text(title_res.group(2), encoding='utf-8') size = tr('td')[5].contents[0] entry[ 'url'] = 'https://www.t411.al/torrents/download/?id=%s' % tid entry['torrent_seeds'] = tr('td')[7].contents[0] entry['torrent_leeches'] = tr('td')[8].contents[0] entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) size = re.search('([\.\d]+) ([GMK]?)B', size) entry['content_size'] = parse_filesize(size.group(0)) auth_handler = t411Auth(config['username'], config['password']) entry['download_auth'] = auth_handler entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
def test_parse_filesize_single_digit(self): size = '1 GiB' assert compare_floats(parse_filesize(size), 1024)
def search(self, task, entry, config): """ Search for entries on PassThePopcorn """ params = {} if 'tags' in config: tags = config['tags'] if isinstance(config['tags'], list) else [config['tags']] params['taglist'] = ',+'.join(tags) release_type = config.get('release_type') if release_type: params['scene'] = RELEASE_TYPES[release_type] if config.get('freeleech'): params['freetorrent'] = int(config['freeleech']) ordering = 'desc' if config['order_desc'] else 'asc' entries = set() params.update({ 'order_by': ORDERING[config['order_by']], 'order_way': ordering, 'action': 'advanced', 'json': 'noredirect' }) search_strings = entry.get('search_strings', [entry['title']]) # searching with imdb id is much more precise if entry.get('imdb_id'): search_strings = [entry['imdb_id']] for search_string in search_strings: params['searchstr'] = search_string log.debug('Using search params: %s', params) try: result = self.get(self.base_url + 'torrents.php', params, config['username'], config['password'], config['passkey']).json() except RequestException as e: log.error('PassThePopcorn request failed: %s', e) continue total_results = result['TotalResults'] log.debug('Total results: %s', total_results) authkey = result['AuthKey'] passkey = result['PassKey'] for movie in result['Movies']: # skip movies that are irrelevant if entry.get('movie_year') and int(movie['Year']) != int(entry['movie_year']): log.debug('Movie year %s does not match %s', movie['Year'], entry['movie_year']) continue # imdb id in the json result is without 'tt' if entry.get('imdb_id') and movie['ImdbId'] not in entry['imdb_id']: log.debug('imdb id %s does not match %s', movie['ImdbId'], entry['imdb_id']) continue for torrent in movie['Torrents']: e = Entry() e['title'] = torrent['ReleaseName'] e['torrent_tags'] = movie['Tags'] e['content_size'] = parse_filesize(torrent['Size'] + ' b') e['torrent_snatches'] = int(torrent['Snatched']) e['torrent_seeds'] = int(torrent['Seeders']) e['torrent_leeches'] = int(torrent['Leechers']) e['torrent_id'] = int(torrent['Id']) e['golden_popcorn'] = torrent['GoldenPopcorn'] e['checked'] = torrent['Checked'] e['uploaded_at'] = dateutil_parse(torrent['UploadTime']) e['url'] = self.base_url + 'torrents.php?action=download&id={}&authkey={}&torrent_pass={}'.format( e['torrent_id'], authkey, passkey ) entries.add(e) return entries
def search(self, task, entry, config): """ Search for entries on PassThePopcorn """ params = {} if 'tags' in config: tags = config['tags'] if isinstance(config['tags'], list) else [config['tags']] params['taglist'] = ',+'.join(tags) release_type = config.get('release_type') if release_type: params['scene'] = RELEASE_TYPES[release_type] if config.get('freeleech'): params['freetorrent'] = int(config['freeleech']) ordering = 'desc' if config['order_desc'] else 'asc' entries = set() params.update( { 'order_by': ORDERING[config['order_by']], 'order_way': ordering, 'action': 'advanced', 'json': 'noredirect', } ) search_strings = entry.get('search_strings', [entry['title']]) # searching with imdb id is much more precise if entry.get('imdb_id'): search_strings = [entry['imdb_id']] for search_string in search_strings: params['searchstr'] = search_string log.debug('Using search params: %s', params) try: result = self.get( self.base_url + 'torrents.php', params, config['username'], config['password'], config['passkey'], ).json() except RequestException as e: log.error('PassThePopcorn request failed: %s', e) continue total_results = result['TotalResults'] log.debug('Total results: %s', total_results) authkey = result['AuthKey'] passkey = result['PassKey'] for movie in result['Movies']: # skip movies with wrong year # don't consider if we have imdb_id (account for year discrepancies if we know we have # the right movie) if ( 'imdb_id' not in movie and 'movie_year' in movie and int(movie['Year']) != int(entry['movie_year']) ): log.debug( 'Movie year %s does not match default %s', movie['Year'], entry['movie_year'], ) continue # imdb id in the json result is without 'tt' if 'imdb_id' in movie and movie['ImdbId'] not in entry['imdb_id']: log.debug('imdb id %s does not match %s', movie['ImdbId'], entry['imdb_id']) continue for torrent in movie['Torrents']: e = Entry() e['title'] = torrent['ReleaseName'] e['imdb_id'] = entry.get('imdb_id') e['torrent_tags'] = movie['Tags'] e['content_size'] = parse_filesize(torrent['Size'] + ' b') e['torrent_snatches'] = int(torrent['Snatched']) e['torrent_seeds'] = int(torrent['Seeders']) e['torrent_leeches'] = int(torrent['Leechers']) e['torrent_id'] = int(torrent['Id']) e['golden_popcorn'] = torrent['GoldenPopcorn'] e['checked'] = torrent['Checked'] e['scene'] = torrent['Scene'] e['uploaded_at'] = dateutil_parse(torrent['UploadTime']) e['ptp_remaster_title'] = torrent.get( 'RemasterTitle' ) # tags such as remux, 4k remaster, etc. e['ptp_quality'] = torrent.get( 'Quality' ) # high, ultra high, or standard definition e['ptp_resolution'] = torrent.get('Resolution') # 1080p, 720p, etc. e['ptp_source'] = torrent.get('Source') # blu-ray, dvd, etc. e['ptp_container'] = torrent.get('Container') # mkv, vob ifo, etc. e['ptp_codec'] = torrent.get('Codec') # x264, XviD, etc. e['url'] = ( self.base_url + 'torrents.php?action=download&id={}&authkey={}&torrent_pass={}'.format( e['torrent_id'], authkey, passkey ) ) entries.add(e) return entries
def test_parse_filesize_ib_not_valid(self): with pytest.raises(ValueError): parse_filesize('100 ib')
def search(self, task, entry, config): """CPASBIEN search plugin Config example: tv_search_cpasbien: discover: what: - trakt_list: username: xxxxxxx api_key: xxxxxxx series: watchlist from: - cpasbien: category: "series-vostfr" interval: 1 day ignore_estimations: yes Category is ONE of: all films series musique films-french 1080p 720p series-francaise films-dvdrip films-vostfr series-vostfr ebook """ base_url = 'http://www.cpasbien.io' entries = set() for search_string in entry.get('search_strings', [entry['title']]): search_string = search_string.replace(' ', '-').lower() search_string = search_string.replace('(', '') search_string = search_string.replace(')', '') query = normalize_unicode(search_string) query_url_fragment = quote_plus(query.encode('utf-8')) # http://www.cpasbien.pe/recherche/ncis.html if config['category'] == 'all': str_url = (base_url, 'recherche', query_url_fragment) url = '/'.join(str_url) else: category_url_fragment = '%s' % config['category'] str_url = (base_url, 'recherche', category_url_fragment, query_url_fragment) url = '/'.join(str_url) log.debug('search url: %s' % url + '.html') # GET URL f = task.requests.get(url + '.html').content soup = get_soup(f) if soup.findAll(text=re.compile(' 0 torrents')): log.debug('search returned no results') else: nextpage = 0 while (nextpage >= 0): if (nextpage > 0): newurl = url + '/page-' + str(nextpage) log.debug('-----> NEXT PAGE : %s' % newurl) f1 = task.requests.get(newurl).content soup = get_soup(f1) for result in soup.findAll('div', attrs={'class': re.compile('ligne')}): entry = Entry() link = result.find('a', attrs={'href': re.compile('dl-torrent')}) entry['title'] = link.contents[0] # REWRITE URL page_link = link.get('href') link_rewrite = page_link.split('/') # get last value in array remove .html and replace by .torrent endlink = link_rewrite[-1] str_url = (base_url, '/telechargement/', endlink[:-5], '.torrent') entry['url'] = ''.join(str_url) log.debug('Title: %s | DL LINK: %s' % (entry['title'], entry['url'])) entry['torrent_seeds'] = (int(result.find('span', attrs={'class': re.compile('seed')}).text)) entry['torrent_leeches'] = (int(result.find('div', attrs={'class': re.compile('down')}).text)) size = result.find('div', attrs={'class': re.compile('poid')}).text entry['content_size'] = parse_filesize(size, si=False) if (entry['torrent_seeds'] > 0): entries.add(entry) else: log.debug('0 SEED, not adding entry') if soup.find(text=re.compile('Suiv')): nextpage += 1 else: nextpage = -1 return entries
def search(self, task, entry, config): """ Search for entries on Limetorrents """ if not isinstance(config, dict): config = {'category': config} order_by = '' if isinstance(config.get('order_by'), str): if config['order_by'] != 'date': order_by = '{0}/1'.format(config['order_by']) category = 'all' if isinstance(config.get('category'), str): category = '{0}'.format(config['category']) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = 'search/{0}/{1}/{2}'.format(category, quote(search_string.encode('utf8')), order_by) log.debug('Using search: %s; category: %s; ordering: %s', search_string, category, order_by or 'default') try: page = task.requests.get(self.base_url + query) log.debug('requesting: %s', page.url) except RequestException as e: log.error('Limetorrents request failed: %s', e) continue soup = get_soup(page.content) if soup.find('a', attrs={'class': 'csprite_dl14'}) is not None: for link in soup.findAll('a', attrs={'class': 'csprite_dl14'}): row = link.find_parent('tr') info_url = str(link.get('href')) # Get the title from the URL as it's complete versus the actual Title text which gets cut off title = str(link.next_sibling.get('href')) title = title[:title.rfind('-torrent')].replace('-', ' ') title = title[1:] data = row.findAll('td', attrs={'class': 'tdnormal'}) size = str(data[1].text).replace(',', '') seeds = int(row.find('td', attrs={'class': 'tdseed'}).text.replace(',', '')) leeches = int(row.find('td', attrs={'class': 'tdleech'}).text.replace(',', '')) size = parse_filesize(size) e = Entry() e['url'] = info_url e['title'] = title e['torrent_seeds'] = seeds e['torrent_leeches'] = leeches e['search_sort'] = torrent_availability(e['torrent_seeds'], e['torrent_leeches']) e['content_size'] = size entries.add(e) return entries
def search(self, task, entry, config): """ Search for entries on PassThePopcorn """ params = {} if 'tags' in config: tags = config['tags'] if isinstance(config['tags'], list) else [config['tags']] params['taglist'] = ',+'.join(tags) release_type = config.get('release_type') if release_type: params['scene'] = RELEASE_TYPES[release_type] if config.get('freeleech'): params['freetorrent'] = int(config['freeleech']) ordering = 'desc' if config['order_desc'] else 'asc' entries = set() params.update({ 'order_by': ORDERING[config['order_by']], 'order_way': ordering, 'action': 'advanced', 'json': 'noredirect' }) search_strings = entry.get('search_strings', [entry['title']]) # searching with imdb id is much more precise if entry.get('imdb_id'): search_strings = [entry['imdb_id']] for search_string in search_strings: params['searchstr'] = search_string log.debug('Using search params: %s', params) try: result = self.get(self.base_url + 'torrents.php', params, config['username'], config['password'], config['passkey']).json() except RequestException as e: log.error('PassThePopcorn request failed: %s', e) continue total_results = result['TotalResults'] log.debug('Total results: %s', total_results) authkey = result['AuthKey'] passkey = result['PassKey'] for movie in result['Movies']: # skip movies that are irrelevant if entry.get('movie_year') and int(movie['Year']) != int(entry['movie_year']): log.debug('Movie year %s does not match %s', movie['Year'], entry['movie_year']) continue # imdb id in the json result is without 'tt' if entry.get('imdb_id') and movie['ImdbId'] not in entry['imdb_id']: log.debug('imdb id %s does not match %s', movie['ImdbId'], entry['imdb_id']) continue for torrent in movie['Torrents']: e = Entry() e['title'] = torrent['ReleaseName'] if entry.get('imdb_id'): e['imdb_id'] = entry.get('imdb_id') e['torrent_tags'] = movie['Tags'] e['content_size'] = parse_filesize(torrent['Size'] + ' b') e['torrent_snatches'] = int(torrent['Snatched']) e['torrent_seeds'] = int(torrent['Seeders']) e['torrent_leeches'] = int(torrent['Leechers']) e['torrent_id'] = int(torrent['Id']) e['golden_popcorn'] = torrent['GoldenPopcorn'] e['checked'] = torrent['Checked'] e['uploaded_at'] = dateutil_parse(torrent['UploadTime']) e['url'] = self.base_url + 'torrents.php?action=download&id={}&authkey={}&torrent_pass={}'.format( e['torrent_id'], authkey, passkey ) entries.add(e) return entries
def search(self, task, entry, config): """ Search for entries on PassThePopcorn """ params = {} if 'tags' in config: tags = config['tags'] if isinstance(config['tags'], list) else [config['tags']] params['taglist'] = ',+'.join(tags) release_type = config.get('release_type') if release_type: params['scene'] = RELEASE_TYPES[release_type] if config.get('freeleech'): params['freetorrent'] = int(config['freeleech']) ordering = 'desc' if config['order_desc'] else 'asc' entries = set() params.update({ 'order_by': ORDERING[config['order_by']], 'order_way': ordering, 'action': 'advanced', 'json': 'noredirect' }) search_strings = entry.get('search_strings', [entry['title']]) # searching with imdb id is much more precise if entry.get('imdb_id'): search_strings = [entry['imdb_id']] for search_string in search_strings: params['searchstr'] = search_string log.debug('Using search params: %s', params) try: result = self.get(self.base_url + 'torrents.php', params, config['username'], config['password'], config['passkey']).json() except RequestException as e: log.error('PassThePopcorn request failed: %s', e) continue total_results = result['TotalResults'] log.debug('Total results: %s', total_results) authkey = result['AuthKey'] passkey = result['PassKey'] for movie in result['Movies']: # skip movies with wrong year # don't consider if we have imdb_id (account for year discrepancies if we know we have # the right movie) if ('imdb_id' not in movie and 'movie_year' in movie and int(movie['Year']) != int(entry['movie_year'])): log.debug('Movie year %s does not match default %s', movie['Year'], entry['movie_year']) continue # imdb id in the json result is without 'tt' if 'imdb_id' in movie and movie['ImdbId'] not in entry[ 'imdb_id']: log.debug('imdb id %s does not match %s', movie['ImdbId'], entry['imdb_id']) continue for torrent in movie['Torrents']: e = Entry() e['title'] = torrent['ReleaseName'] e['imdb_id'] = entry.get('imdb_id') e['torrent_tags'] = movie['Tags'] e['content_size'] = parse_filesize(torrent['Size'] + ' b') e['torrent_snatches'] = int(torrent['Snatched']) e['torrent_seeds'] = int(torrent['Seeders']) e['torrent_leeches'] = int(torrent['Leechers']) e['torrent_id'] = int(torrent['Id']) e['golden_popcorn'] = torrent['GoldenPopcorn'] e['checked'] = torrent['Checked'] e['scene'] = torrent['Scene'] e['uploaded_at'] = dateutil_parse(torrent['UploadTime']) e['ptp_remaster_title'] = torrent.get( 'RemasterTitle' ) # tags such as remux, 4k remaster, etc. e['ptp_quality'] = torrent.get( 'Quality') # high, ultra high, or standard definition e['ptp_resolution'] = torrent.get( 'Resolution') # 1080p, 720p, etc. e['ptp_source'] = torrent.get( 'Source') # blu-ray, dvd, etc. e['ptp_container'] = torrent.get( 'Container') # mkv, vob ifo, etc. e['ptp_codec'] = torrent.get('Codec') # x264, XviD, etc. e['url'] = self.base_url + 'torrents.php?action=download&id={}&authkey={}&torrent_pass={}'.format( e['torrent_id'], authkey, passkey) entries.add(e) return entries
def search(self, task, entry, config): """ Search for entries on AwesomeHD """ # need lxml to parse xml try: import lxml # noqa except ImportError as e: log.debug('Error importing lxml: %s', e) raise plugin.DependencyError( 'awesomehd', 'lxml', 'lxml module required. ImportError: %s' % e) config = self.prepare_config(config) # set a domain limit, but allow the user to overwrite it if 'awesome-hd.me' not in task.requests.domain_limiters: task.requests.add_domain_limiter( TimedLimiter('awesome-hd.me', '5 seconds')) entries = set() # Can only search for imdb if not entry.get('imdb_id'): log.debug('Skipping entry %s because of missing imdb id', entry['title']) return entries # Standard search params params = { 'passkey': config['passkey'], 'internal': int(config['only_internal']), 'action': 'imdbsearch', 'imdb': entry['imdb_id'], } try: response = task.requests.get(self.base_url + 'searchapi.php', params=params).content except RequestException as e: log.error('Failed to search for imdb id %s: %s', entry['imdb_id'], e) return entries try: soup = get_soup(response, 'xml') if soup.find('error'): log.error(soup.find('error').get_text()) return entries except Exception as e: log.error('Failed to parse xml result for imdb id %s: %s', entry['imdb_id'], e) return entries authkey = soup.find('authkey').get_text() for result in soup.find_all('torrent'): # skip audio releases for now if not result.find('resolution').get_text(): log.debug('Skipping audio release') continue e = Entry() e['imdb_id'] = result.find('imdb').get_text() e['torrent_id'] = int(result.find('id').get_text()) e['uploaded_at'] = dateutil_parse(result.find('time').get_text()) e['content_size'] = parse_filesize('{} b'.format( result.find('size').get_text())) e['torrent_snatches'] = int(result.find('snatched').get_text()) e['torrent_seeds'] = int(result.find('seeders').get_text()) e['torrent_leeches'] = int(result.find('leechers').get_text()) e['release_group'] = result.find('releasegroup').get_text() e['freeleech_percent'] = int( (1 - float(result.find('freeleech').get_text())) * 100) e['encode_status'] = result.find('encodestatus').get_text() e['subtitles'] = result.find('subtitles').get_text().split(', ') e['url'] = ( self.base_url + 'torrents.php?action=download&id={}&authkey={}&torrent_pass={}' .format(e['torrent_id'], authkey, config['passkey'])) # Generate a somewhat sensible title audio = result.find('audioformat').get_text().replace( 'AC-3', 'AC3') # normalize a bit source = result.find('media').get_text() encoder = result.find('encoding').get_text() # calling a WEB-DL a remux is pretty redundant if 'WEB' in source.upper(): encoder = re.sub('REMUX', '', encoder, flags=re.IGNORECASE).strip() e['title'] = '{movie_name} {year} {resolution} {source} {audio} {encoder}-{release_group}'.format( movie_name=result.find('name').get_text(), year=result.find('year').get_text(), resolution=result.find('resolution').get_text(), source=source, audio=audio, encoder=encoder, release_group=e['release_group'], ) entries.add(e) return entries
def search(self, task, entry, config=None): """ Search for name from torrent411. """ url_base = "http://www.t411.ch" if not isinstance(config, dict): config = {} category = config.get("category") if category in list(CATEGORIES): category = CATEGORIES[category] sub_categories = config.get("sub_category") if not isinstance(sub_categories, list): sub_categories = [sub_categories] filter_url = "" if isinstance(category, int): filter_url = "&cat=%s" % str(category) if sub_categories[0] is not None: sub_categories = [SUB_CATEGORIES[c] for c in sub_categories] filter_url = ( filter_url + "&" + "&".join( [ urllib.parse.quote_plus("term[%s][]" % c[0]).encode("utf-8") + "=" + str(c[1]) for c in sub_categories ] ) ) if "series_season" in entry and "series_episode" in entry: season = entry["series_season"] if season in list(SEASONS): filter_url = filter_url + "&term[%d][]" % SEASONS[season][0] + "=" + str(SEASONS[season][1]) episode = entry["series_episode"] if episode in list(EPISODES): filter_url = filter_url + "&term[%d][]" % EPISODES[episode][0] + "=" + str(EPISODES[episode][1]) entries = set() for search_string in entry.get("search_strings", [entry["title"]]): query = normalize_unicode(search_string) url_search = ( "/torrents/search/?search=%40name+" + urllib.parse.quote_plus(query.encode("utf-8")) + filter_url ) opener = urllib.request.build_opener() opener.addheaders = [("User-agent", "Mozilla/5.0")] response = opener.open(url_base + url_search) data = response.read() soup = get_soup(data) tb = soup.find("table", class_="results") if not tb: continue for tr in tb.findAll("tr")[1:][:-1]: entry = Entry() nfo_link_res = re.search("torrents/nfo/\?id=(\d+)", str(tr)) if nfo_link_res is not None: tid = nfo_link_res.group(1) title_res = re.search( '<a href="//www.t411.ch/torrents/([-A-Za-z0-9+&@#/%|?=~_|!:,.;]+)" title="([^"]*)">', str(tr) ) if title_res is not None: entry["title"] = native_str_to_text(title_res.group(2), encoding="utf-8") size = tr("td")[5].contents[0] entry["url"] = "http://www.t411.ch/torrents/download/?id=%s" % tid entry["torrent_seeds"] = tr("td")[7].contents[0] entry["torrent_leeches"] = tr("td")[8].contents[0] entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"]) size = re.search("([\.\d]+) ([GMK]?)B", size) entry["content_size"] = parse_filesize(size.group(0)) auth_handler = t411Auth(config["username"], config["password"]) entry["download_auth"] = auth_handler entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get("search_sort"))
def search(self, task, entry, config): """ Search for entries on FileList.ro """ entries = set() params = { 'cat': CATEGORIES[config['category']], 'incldead': int(config['include_dead']), 'order_by': SORTING[config['order_by']], 'searchin': SEARCH_IN[config['search_in']], 'asc': int(config['order_ascending']), } for search_string in entry.get('search_strings', [entry['title']]): params['search'] = search_string log.debug('Using search params: %s', params) try: page = self.get(BASE_URL + 'browse.php', params, config['username'], config['password']) log.debug('requesting: %s', page.url) except RequestException as e: log.error('FileList.ro request failed: %s', e) continue soup = get_soup(page.content) for result in soup.findAll('div', attrs={'class': 'torrentrow'}): e = Entry() torrent_info = result.findAll('div', attrs={'class': 'torrenttable'}) # genres genres = torrent_info[1].find('font') if genres: genres = genres.text.lstrip('[').rstrip(']').replace( ' ', '') genres = genres.split('|') tags = torrent_info[1].findAll('img') freeleech = False internal = False for tag in tags: if tag.get('alt', '').lower() == 'freeleech': freeleech = True if tag.get('alt', '').lower() == 'internal': internal = True title = torrent_info[1].find('a').get('title') # this is a dirty fix to get the full title since their developer is a moron if re.match(r"\<img src=\'.*\'\>", title): title = torrent_info[1].find('b').text # if the title is shortened, then do a request to get the full one :( if title.endswith('...'): url = BASE_URL + torrent_info[1].find('a')['href'] try: request = self.get(url, {}, config['username'], config['password']) except RequestException as e: log.error('FileList.ro request failed: %s', e) continue title_soup = get_soup(request.content) title = title_soup.find('div', attrs={ 'class': 'cblock-header' }).text e['title'] = title e['url'] = (BASE_URL + torrent_info[3].find('a')['href'] + '&passkey=' + config['passkey']) e['content_size'] = parse_filesize( torrent_info[6].find('font').text) e['torrent_snatches'] = int( torrent_info[7].find('font').text.replace(' ', '').replace( 'times', '').replace(',', '')) e['torrent_seeds'] = int(torrent_info[8].find('span').text) e['torrent_leeches'] = int(torrent_info[9].find('span').text) e['torrent_internal'] = internal e['torrent_freeleech'] = freeleech if genres: e['torrent_genres'] = genres entries.add(e) return entries
def search(self, task, entry, config): """CPASBIEN search plugin Config example: tv_search_cpasbien: discover: what: - trakt_list: username: xxxxxxx api_key: xxxxxxx series: watchlist from: - cpasbien: category: "series-vostfr" interval: 1 day ignore_estimations: yes Category is ONE of: all films series musique films-french 1080p 720p series-francaise films-dvdrip films-vostfr series-vostfr ebook """ base_url = 'http://www.cpasbien.io' entries = set() for search_string in entry.get('search_strings', [entry['title']]): search_string = search_string.replace(' ', '-').lower() search_string = search_string.replace('(', '') search_string = search_string.replace(')', '') query = normalize_unicode(search_string) query_url_fragment = quote_plus(query.encode('utf-8')) # http://www.cpasbien.pe/recherche/ncis.html if config['category'] == 'all': str_url = (base_url, 'recherche', query_url_fragment) url = '/'.join(str_url) else: category_url_fragment = '%s' % config['category'] str_url = (base_url, 'recherche', category_url_fragment, query_url_fragment) url = '/'.join(str_url) log.debug('search url: %s' % url + '.html') # GET URL f = task.requests.get(url + '.html').content soup = get_soup(f) if soup.findAll(text=re.compile(' 0 torrents')): log.debug('search returned no results') else: nextpage = 0 while (nextpage >= 0): if (nextpage > 0): newurl = url + '/page-' + str(nextpage) log.debug('-----> NEXT PAGE : %s' % newurl) f1 = task.requests.get(newurl).content soup = get_soup(f1) for result in soup.findAll( 'div', attrs={'class': re.compile('ligne')}): entry = Entry() link = result.find( 'a', attrs={'href': re.compile('dl-torrent')}) entry['title'] = link.contents[0] # REWRITE URL page_link = link.get('href') link_rewrite = page_link.split('/') # get last value in array remove .html and replace by .torrent endlink = link_rewrite[-1] str_url = (base_url, '/telechargement/', endlink[:-5], '.torrent') entry['url'] = ''.join(str_url) log.debug('Title: %s | DL LINK: %s' % (entry['title'], entry['url'])) entry['torrent_seeds'] = (int( result.find('span', attrs={ 'class': re.compile('seed') }).text)) entry['torrent_leeches'] = (int( result.find('div', attrs={ 'class': re.compile('down') }).text)) size = result.find('div', attrs={ 'class': re.compile('poid') }).text entry['content_size'] = parse_filesize(size, si=False) if (entry['torrent_seeds'] > 0): entries.add(entry) else: log.debug('0 SEED, not adding entry') if soup.find(text=re.compile('Suiv')): nextpage += 1 else: nextpage = -1 return entries
def test_parse_filesize_non_si(self): size = '1234 GB' expected = 1234 * 1000**3 / 1024**2 assert compare_floats(parse_filesize(size), expected)
def search(self, task, entry, config): """ Search for entries on AwesomeHD """ # need lxml to parse xml try: import lxml # noqa except ImportError as e: log.debug('Error importing lxml: %s', e) raise plugin.DependencyError( 'awesomehd', 'lxml', 'lxml module required. ImportError: %s' % e ) config = self.prepare_config(config) # set a domain limit, but allow the user to overwrite it if 'awesome-hd.me' not in task.requests.domain_limiters: task.requests.add_domain_limiter(TimedLimiter('awesome-hd.me', '5 seconds')) entries = set() # Can only search for imdb if not entry.get('imdb_id'): log.debug('Skipping entry %s because of missing imdb id', entry['title']) return entries # Standard search params params = { 'passkey': config['passkey'], 'internal': int(config['only_internal']), 'action': 'imdbsearch', 'imdb': entry['imdb_id'], } try: response = task.requests.get(self.base_url + 'searchapi.php', params=params).content except RequestException as e: log.error('Failed to search for imdb id %s: %s', entry['imdb_id'], e) return entries try: soup = get_soup(response, 'xml') if soup.find('error'): log.error(soup.find('error').get_text()) return entries except Exception as e: log.error('Failed to parse xml result for imdb id %s: %s', entry['imdb_id'], e) return entries authkey = soup.find('authkey').get_text() for result in soup.find_all('torrent'): # skip audio releases for now if not result.find('resolution').get_text(): log.debug('Skipping audio release') continue e = Entry() e['imdb_id'] = result.find('imdb').get_text() e['torrent_id'] = int(result.find('id').get_text()) e['uploaded_at'] = dateutil_parse(result.find('time').get_text()) e['content_size'] = parse_filesize('{} b'.format(result.find('size').get_text())) e['torrent_snatches'] = int(result.find('snatched').get_text()) e['torrent_seeds'] = int(result.find('seeders').get_text()) e['torrent_leeches'] = int(result.find('leechers').get_text()) e['release_group'] = result.find('releasegroup').get_text() e['freeleech_percent'] = int((1 - float(result.find('freeleech').get_text())) * 100) e['encode_status'] = result.find('encodestatus').get_text() e['subtitles'] = result.find('subtitles').get_text().split(', ') e['url'] = ( self.base_url + 'torrents.php?action=download&id={}&authkey={}&torrent_pass={}'.format( e['torrent_id'], authkey, config['passkey'] ) ) # Generate a somewhat sensible title audio = result.find('audioformat').get_text().replace('AC-3', 'AC3') # normalize a bit source = result.find('media').get_text() encoder = result.find('encoding').get_text() # calling a WEB-DL a remux is pretty redundant if 'WEB' in source.upper(): encoder = re.sub('REMUX', '', encoder, flags=re.IGNORECASE).strip() e[ 'title' ] = '{movie_name} {year} {resolution} {source} {audio} {encoder}-{release_group}'.format( movie_name=result.find('name').get_text(), year=result.find('year').get_text(), resolution=result.find('resolution').get_text(), source=source, audio=audio, encoder=encoder, release_group=e['release_group'], ) entries.add(e) return entries
def test_parse_filesize_auto(self): size = '1234 GiB' expected = 1234 * 1024**3 / 1024**2 assert compare_floats(parse_filesize(size), expected)