def url_rewrite(self, task, entry): soup = self._get_soup(task, entry['url']) link_re = re.compile('rarefile\.net.*\.rar$') # grab links from the main entry: blog_entry = soup.find('div', class_="entry") num_links = 0 link_list = None for paragraph in blog_entry.find_all('p'): links = paragraph.find_all('a', href=link_re) if len(links) > num_links: link_list = links num_links = len(links) if 'urls' in entry: urls = list(entry['urls']) else: urls = [] if link_list is not None: for link in link_list: urls.append(normalize_unicode(link['href'])) else: raise UrlRewritingError('No useable links found at %s' % entry['url']) num_links = len(urls) log.verbose('Found %d links at %s.', num_links, entry['url']) if num_links: entry['urls'] = urls entry['url'] = urls[0] else: raise UrlRewritingError('No useable links found at %s' % entry['url'])
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % quote( name.encode('utf-8'), safe=b':/~?=&%' ) log.debug('search url: %s' % url) html = requests.get(url).text # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={'class': re.compile('s')}).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning( 'Error converting seed value (%s) from newtorrents to integer.' % seed ) seed = 0 # TODO: also parse content_size and peers from results torrents.append( Entry( title=release_name, url=torrent_url, torrent_seeds=seed, torrent_availability=torrent_availability(seed, 0), ) ) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('torrent_availability', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug( 'search result contains multiple matches, sorted %s by most seeders' % torrents ) return torrents
def search(self, task, entry, config=None): from flexget.utils.template import environment search_strings = [ quote(normalize_unicode(s).encode('utf-8')) for s in entry.get('search_strings', [entry['title']]) ] rss_plugin = plugin.get_plugin_by_name('rss') entries = set() rss_config = rss_plugin.instance.build_config(config) try: template = environment.from_string(rss_config['url']) except TemplateSyntaxError as e: raise plugin.PluginError('Invalid jinja template as rss url: %s' % e) rss_config['all_entries'] = True for search_string in search_strings: rss_config['url'] = template.render({'search_term': search_string}) # TODO: capture some other_fields to try to find seed/peer/content_size numbers? try: results = rss_plugin.phase_handlers['input'](task, rss_config) except plugin.PluginError as e: log.error('Error attempting to get rss for %s: %s', rss_config['url'], e) else: entries.update(results) return entries
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} self.set_urls(config.get('url', URL)) sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes or quotes query = query.replace('-', ' ').replace("'", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url) log.debug('Using %s as piratebay search url' % url) page = task.requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: log.error('Malformed search result. No title or url found. Skipping.') continue href = link.get('href') if href.startswith('/'): # relative link? href = self.url + href entry['url'] = href tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) # Parse content_size size_text = link.find_next(attrs={'class': 'detDesc'}).get_text() if size_text: size = re.search(r'Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text) if size: entry['content_size'] = parse_filesize(size.group(1)) else: log.error( 'Malformed search result? Title: "%s", No size? %s', entry['title'], size_text, ) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def prepare_search_query(self, search_string): query = normalize_unicode(search_string) se = re.findall('((((|S)[\d]+(E|x)[\d]+)|(|S)[\d]+))$',query)[0][0] query = re.sub(se,'',query).strip() self.se = se self.query = query return query
def search(self, task, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'is']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz2.%s/%s?f=%s' % (domain, feed, quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = task.requests.get(url) break except requests.ConnectionError as err: # The different domains all resolve to the same ip, so only try more if it was a dns error log.warning('torrentz.%s connection failed. Error: %s' % (domain, err)) continue except requests.RequestException as err: raise plugin.PluginError('Error getting torrentz search results: %s' % err) else: raise plugin.PluginError('Error getting torrentz search results') if not r.content.strip(): raise plugin.PluginError( 'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url ) rss = feedparser.parse(r.content) if rss.get('bozo_exception'): raise plugin.PluginError('Got bozo_exception (bad rss feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE, ) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config=None): entries = set() search_strings = [ normalize_unicode(s) for s in entry.get('search_strings', [entry['title']]) ] for search_string in search_strings: url = 'https://yts.am/api/v2/list_movies.json?query_term=%s' % ( urllib.quote(search_string.encode('utf-8')) ) log.debug('requesting: %s' % url) try: result = requests.get(url) try: data = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting result from yts.') except requests.RequestException as e: raise plugin.PluginError('Could not retrieve query from yts (%s)' % e.args[0]) if not data['status'] == 'ok': raise plugin.PluginError('failed to query YTS') try: if data['data']['movie_count'] > 0: for item in data['data']['movies']: for torrent in item['torrents']: entry = Entry() entry['title'] = item['title'] entry['year'] = item['year'] entry['url'] = torrent['url'] entry['content_size'] = parse_filesize( str(torrent['size_bytes']) + "b" ) entry['torrent_seeds'] = torrent['seeds'] entry['torrent_leeches'] = torrent['peers'] entry['torrent_info_hash'] = torrent['hash'] entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) entry['quality'] = torrent['quality'] entry['imdb_id'] = item['imdb_code'] if entry.isvalid(): entries.add(entry) except Exception: log.debug('invalid return structure from YTS') log.debug('Search got %d results' % len(entries)) return entries
def search(self, task, entry, config): if not isinstance(config, dict): config = {'category': config} config.setdefault('category', 'anime eng') config.setdefault('filter', 'all') entries = set() for search_string in entry.get('search_strings', [entry['title']]): name = normalize_unicode(search_string) url = 'https://www.nyaa.si/?page=rss&q=%s&c=%s&f=%s' % ( quote(name.encode('utf-8')), CATEGORIES[config['category']], FILTERS.index(config['filter']), ) log.debug('requesting: %s' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if status >= 400: continue ex = rss.get('bozo_exception', False) if ex: log.error('Got bozo_exception (bad feed) on %s' % url) continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['torrent_seeds'] = int(item.nyaa_seeders) entry['torrent_leeches'] = int(item.nyaa_leechers) entry['torrent_info_hash'] = item.nyaa_infohash entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) if item.nyaa_size: entry['content_size'] = parse_filesize(item.nyaa_size) entries.add(entry) return entries
def url_rewrite(self, task, entry): try: page = task.requests.get(entry['url']) except RequestException as e: raise UrlRewritingError(str(e)) try: soup = get_soup(page.text) except Exception as e: raise UrlRewritingError(str(e)) link_elements = soup.find_all('pre', class_='links') if 'urls' in entry: urls = list(entry['urls']) else: urls = [] for element in link_elements: urls.extend(element.text.splitlines()) regexps = self.config.get('filehosters_re', []) filtered_urls = [] for i, url in enumerate(urls): urls[i] = normalize_unicode(url) for regexp in regexps: if re.search(regexp, urls[i]): filtered_urls.append(urls[i]) log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp) break else: if regexps: log.debug( 'Url: "%s" does not match any of the given filehoster filters: %s', urls[i], str(regexps), ) if regexps: log.debug('Using filehosters_re filters: %s', str(regexps)) urls = filtered_urls else: log.debug('No filehoster filters configured, using all found links.') num_links = len(urls) log.verbose('Found %d links at %s.', num_links, entry['url']) if num_links: entry['urls'] = urls entry['url'] = urls[0] else: raise UrlRewritingError('No useable links found at %s' % entry['url'])
def search(self, task, entry, config=None): if not config: log.debug('Descargas2020 disabled') return set() log.debug('Search Descargas2020') url_search = 'http://descargas2020.com/buscar' results = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = re.sub(r' \(\d\d\d\d\)$', '', query) log.debug('Searching Descargas2020 %s', query) query = unicodedata.normalize('NFD', query).encode('ascii', 'ignore') data = {'q': query} try: response = task.requests.post(url_search, data=data) except requests.RequestException as e: log.error('Error searching Descargas2020: %s', e) return results content = response.content soup = get_soup(content) soup2 = soup.find('ul', attrs={'class': 'buscar-list'}) children = soup2.findAll('a', href=True) for child in children: entry = Entry() entry['url'] = child['href'] entry_title = child.find('h2') if entry_title is None: log.debug('Ignore empty entry') continue entry_title = entry_title.text if not entry_title: continue try: entry_quality_lan = re.search( r'.+ \[([^\]]+)\](\[[^\]]+\])+$', entry_title ).group(1) except AttributeError: log.debug('Quality not found') continue entry_title = re.sub(r' \[.+]$', '', entry_title) entry['title'] = entry_title + ' ' + entry_quality_lan results.add(entry) log.debug('Finish search Descargas2020 with %d entries', len(results)) return results
def search(self, task, entry, config=None): if not isinstance(config, dict): config = {} category = CATEGORIES.get(config.get('category', 'all'), None) category_query = '&cid=%d' % category if category else '' entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) search_query = '&search=%s' % quote(query.encode('utf-8')) url = 'http://extratorrent.cc/rss.xml?type=search%s' % search_query log.debug('Using %s as extratorrent search url' % url) rss = feedparser.parse(url) status = rss.get('status', False) if status != 200: log.debug('Search result not 200 (OK), received %s' % status) if not status or status >= 400: continue for item in rss.entries: entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(item.size) / 1024 / 1024 entry['torrent_info_hash'] = item.info_hash if isinstance(item.seeders, int): entry['torrent_seeds'] = int(item.seeders) if isinstance(item.leechers, int): entry['torrent_leeches'] = int(item.leechers) entries.add(entry) return entries
def prepare_search_query(self, search_string): return self.replace_sepcial_chars(normalize_unicode(search_string))
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % quote( name.encode('utf-8'), safe=b':/~?=&%') log.debug('search url: %s' % url) html = requests.get(url).text # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={ 'class': re.compile('s') }).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning( 'Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 # TODO: also parse content_size and peers from results torrents.append( Entry( title=release_name, url=torrent_url, torrent_seeds=seed, torrent_availability=torrent_availability(seed, 0), )) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('torrent_availability', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug( 'search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def search(self, task, entry, config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} self.set_urls(config.get('url', URL)) sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) # TPB search doesn't like dashes or quotes query = query.replace('-', ' ').replace("'", " ") # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = '%s/search/%s%s' % (self.url, quote( query.encode('utf-8')), filter_url) logger.debug('Using {} as piratebay search url', url) page = task.requests.get(url).content soup = get_soup(page) for link in soup.find_all('a', attrs={'class': 'detLink'}): entry = Entry() entry['title'] = self.extract_title(link) if not entry['title']: logger.error( 'Malformed search result. No title or url found. Skipping.' ) continue href = link.get('href') if href.startswith('/'): # relative link? href = self.url + href entry['url'] = href row = link.parent.parent.parent description = row.find_all('a', attrs={'class': 'detDesc'}) if description and description[0].contents[0] == "piratebay ": logger.debug('Advertisement entry. Skipping.') continue tds = row.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size_text = link.find_next(attrs={ 'class': 'detDesc' }).get_text() if size_text: size = re.search(r'Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text) if size: entry['content_size'] = parse_filesize(size.group(1)) else: logger.error( 'Malformed search result? Title: "{}", No size? {}', entry['title'], size_text, ) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [ c if isinstance(c, int) else CATEGORIES[c] for c in categories ] category_params = {str(c): '' for c in categories if str(c)} entries = set() for search_string in entry.get('search_strings', [entry['title']]): search_params = { key: value for (key, value) in category_params.items() } query = normalize_unicode(search_string) search_params.update({'q': query, 'qf': ''}) logger.debug('searching with params: {}', search_params) if config.get('free'): req = requests.get(FREE_SEARCH_URL, params=search_params, cookies={ 'uid': str(config['uid']), 'pass': config['password'] }) else: req = requests.get(SEARCH_URL, params=search_params, cookies={ 'uid': str(config['uid']), 'pass': config['password'] }) logger.debug('full search URL: {}', req.url) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError( "Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): logger.debug('No results found for search {}', search_string) break entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry[ 'url'] = f"{BASE_URL}{link}?torrent_pass={config.get('rss_key')}" entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', { 'class': 'ac t_seeders' }).text leechers = torrent.findNext('td', { 'class': 'ac t_leechers' }).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext( text=re.compile(r'^([\.\d]+) ([GMK]?)B$')) size = re.search(r'^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) logger.debug('Found entry {}', entry) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post( 'https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True, ) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s' % str(e)) if login.url.endswith('/user/account/login/'): raise PluginError('Could not login to torrentleech, faulty credentials?') if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ( 'https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url ) logger.debug('Using {} as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format( torrent['fid'], rss_key, torrent['filename'] ) logger.debug('RSS-ified download link: {}', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def url_rewrite(self, task, entry): soup = self._get_soup(task, entry['url']) # grab links from the main post: link_elements = [] log.debug( 'Searching %s for a tags where the text matches one of: %s', entry['url'], str(self.config.get('link_text_re')), ) for regexp in self.config.get('link_text_re'): link_elements.extend(soup.find_all('a', string=re.compile(regexp))) log.debug('Original urls: %s', str(entry['urls'])) if 'urls' in entry: urls = list(entry['urls']) log.debug('Original urls: %s', str(entry['urls'])) else: urls = [] log.debug('Found link elements: %s', str(link_elements)) for element in link_elements: if re.search('nfo1.rlsbb.(ru|com)', element['href']): # grab multipart links urls.extend(self.grab_multilinks(task, element['href'])) else: urls.append(element['href']) # grab links from comments regexps = self.config.get('filehosters_re', []) if self.config.get('parse_comments'): comments = soup.find_all('div', id=re.compile("commentbody")) log.debug('Comment parsing enabled: found %d comments.', len(comments)) if comments and not regexps: log.warn( 'You have enabled comment parsing but you did not define any filehoster_re filter. You may get a lot of unwanted and potentially dangerous links from the comments.' ) for comment in comments: links = comment.find_all('a') for link in links: urls.append(link['href']) # filter urls: filtered_urls = [] for i, url in enumerate(urls): urls[i] = normalize_unicode(url) for regexp in regexps: if re.search(regexp, urls[i]): filtered_urls.append(urls[i]) log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp) break else: if regexps: log.debug( 'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s', urls[i], str(regexps), ) if regexps: log.debug('Using filehosters_re filters: %s', str(regexps)) urls = filtered_urls else: log.debug('No filehoster filters configured, using all found links.') num_links = len(urls) log.verbose('Found %d links at %s.', num_links, entry['url']) if num_links: entry['urls'] = urls entry['url'] = urls[0] else: raise UrlRewritingError('No useable links found at %s' % entry['url'])
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [ c if isinstance(c, int) else CATEGORIES[c] for c in categories ] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format( base_url=BASE_URL, filter=filter_url, query=query) logger.debug('searching with url: {}', url) req = requests.get(url, cookies={ 'uid': str(config['uid']), 'pass': config['password'] }) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError( "Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): logger.debug('No results found for search {}', search_string) break entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=link, key=config.get('rss_key')) entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', { 'class': 'ac t_seeders' }).text leechers = torrent.findNext('td', { 'class': 'ac t_leechers' }).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) size = torrent.findNext( text=re.compile(r'^([\.\d]+) ([GMK]?)B$')) size = re.search(r'^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) logger.debug('Found entry {}', entry) entries.add(entry) return entries
def search(self, task, entry, config=None): """ Search for name from torrentday. """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [ c if isinstance(c, int) else CATEGORIES[c] for c in categories ] params = { 'cata': 'yes', 'c{}'.format(','.join(str(c) for c in categories)): 1, 'clear-new': 1, } entries = set() for search_string in entry.get('search_strings', [entry['title']]): url = 'https://www.torrentday.com/t' params['q'] = normalize_unicode(search_string).replace(':', '') cookies = { 'uid': config['uid'], 'pass': config['passkey'], '__cfduid': config['cfduid'], } try: page = requests.get(url, params=params, cookies=cookies).content except RequestException as e: raise PluginError( 'Could not connect to torrentday: {}'.format(e)) # the following should avoid table being None due to a malformed # html in td search results soup = get_soup(page).contents[1].contents[1].next.next.nextSibling table = soup.find('table', {'id': 'torrentTable'}) if table is None: raise PluginError( 'Search returned by torrentday appears to be empty or malformed.' ) # the first row is the header so skip it for tr in table.find_all('tr')[1:]: entry = Entry() # find the torrent names td = tr.find('td', {'class': 'torrentNameInfo'}) if not td: log.warning('Could not find entry torrentNameInfo for %s.', search_string) continue title = td.find('a') if not title: log.warning('Could not determine title for %s.', search_string) continue entry['title'] = title.contents[0] log.debug('title: %s', title.contents[0]) # find download link torrent_url = tr.find('td', {'class': 'ac'}) if not torrent_url: log.warning('Could not determine download link for %s.', search_string) continue torrent_url = torrent_url.find('a').get('href') # construct download URL torrent_url = ('https://www.torrentday.com/' + torrent_url + '?torrent_pass='******'rss_key']) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders = tr.find('td', {'class': 'ac seedersInfo'}) leechers = tr.find('td', {'class': 'ac leechersInfo'}) entry['torrent_seeds'] = int(seeders.contents[0].replace( ',', '')) entry['torrent_leeches'] = int(leechers.contents[0].replace( ',', '')) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # use tr object for size size = tr.find( 'td', text=re.compile(r'([\.\d]+) ([TGMKk]?)B')).contents[0] size = re.search(r'([\.\d]+) ([TGMKk]?)B', str(size)) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def url_rewrite(self, task, entry): soup = self._get_soup(task, entry['url']) # grab links from the main post: link_elements = [] log.debug( 'Searching %s for a tags where the text matches one of: %s', entry['url'], str(self.config.get('link_text_re')), ) for regexp in self.config.get('link_text_re'): link_elements.extend(soup.find_all('a', string=re.compile(regexp))) log.debug('Original urls: %s', str(entry['urls'])) if 'urls' in entry: urls = list(entry['urls']) log.debug('Original urls: %s', str(entry['urls'])) else: urls = [] log.debug('Found link elements: %s', str(link_elements)) for element in link_elements: if re.search('nfo1.rlsbb.(ru|com)', element['href']): # grab multipart links urls.extend(self.grab_multilinks(task, element['href'])) else: urls.append(element['href']) # grab links from comments regexps = self.config.get('filehosters_re', []) if self.config.get('parse_comments'): comments = soup.find_all('div', id=re.compile("commentbody")) log.debug('Comment parsing enabled: found %d comments.', len(comments)) if comments and not regexps: log.warn( 'You have enabled comment parsing but you did not define any filehoster_re filter. You may get a lot of unwanted and potentially dangerous links from the comments.' ) for comment in comments: links = comment.find_all('a') for link in links: urls.append(link['href']) # filter urls: filtered_urls = [] for i, url in enumerate(urls): urls[i] = normalize_unicode(url) for regexp in regexps: if re.search(regexp, urls[i]): filtered_urls.append(urls[i]) log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp) break else: if regexps: log.debug( 'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s', urls[i], str(regexps), ) if regexps: log.debug('Using filehosters_re filters: %s', str(regexps)) urls = filtered_urls else: log.debug( 'No filehoster filters configured, using all found links.') num_links = len(urls) log.verbose('Found %d links at %s.', num_links, entry['url']) if num_links: entry['urls'] = urls entry['url'] = urls[0] else: raise UrlRewritingError('No useable links found at %s' % entry['url'])
def prepare_search_query(self, search_string): return self.replace_sepcial_chars(normalize_unicode(search_string))
def search(self, task, entry, config=None): """ Search for name from torrentday. """ categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] params = { 'cata': 'yes', 'c{}'.format(','.join(str(c) for c in categories)): 1, 'clear-new': 1, } entries = set() for search_string in entry.get('search_strings', [entry['title']]): url = 'https://www.torrentday.com/t' params['q'] = normalize_unicode(search_string).replace(':', '') cookies = { 'uid': config['uid'], 'pass': config['passkey'], '__cfduid': config['cfduid'], } try: page = requests.get(url, params=params, cookies=cookies).content except RequestException as e: raise PluginError('Could not connect to torrentday: {}'.format(e)) # the following should avoid table being None due to a malformed # html in td search results soup = get_soup(page).contents[1].contents[1].next.next.nextSibling table = soup.find('table', {'id': 'torrentTable'}) if table is None: raise PluginError( 'Search returned by torrentday appears to be empty or malformed.' ) # the first row is the header so skip it for tr in table.find_all('tr')[1:]: entry = Entry() # find the torrent names td = tr.find('td', {'class': 'torrentNameInfo'}) if not td: log.warning('Could not find entry torrentNameInfo for %s.', search_string) continue title = td.find('a') if not title: log.warning('Could not determine title for %s.', search_string) continue entry['title'] = title.contents[0] log.debug('title: %s', title.contents[0]) # find download link torrent_url = tr.find('td', {'class': 'ac'}) if not torrent_url: log.warning('Could not determine download link for %s.', search_string) continue torrent_url = torrent_url.find('a').get('href') # construct download URL torrent_url = ( 'https://www.torrentday.com/' + torrent_url + '?torrent_pass='******'rss_key'] ) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders = tr.find('td', {'class': 'ac seedersInfo'}) leechers = tr.find('td', {'class': 'ac leechersInfo'}) entry['torrent_seeds'] = int(seeders.contents[0].replace(',', '')) entry['torrent_leeches'] = int(leechers.contents[0].replace(',', '')) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) # use tr object for size size = tr.find('td', text=re.compile(r'([\.\d]+) ([TGMKk]?)B')).contents[0] size = re.search(r'([\.\d]+) ([TGMKk]?)B', str(size)) entry['content_size'] = parse_filesize(size.group(0)) entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def search(self, task, entry, config): """CPASBIEN search plugin Config example: tv_search_cpasbien: discover: what: - trakt_list: username: xxxxxxx api_key: xxxxxxx series: watchlist from: - cpasbien: category: "series-vostfr" interval: 1 day ignore_estimations: yes Category is ONE of: all films series musique films-french 1080p 720p series-francaise films-dvdrip films-vostfr series-vostfr ebook """ base_url = 'http://www.cpasbien.io' entries = set() for search_string in entry.get('search_strings', [entry['title']]): search_string = search_string.replace(' ', '-').lower() search_string = search_string.replace('(', '') search_string = search_string.replace(')', '') query = normalize_unicode(search_string) query_url_fragment = quote_plus(query.encode('utf-8')) # http://www.cpasbien.pe/recherche/ncis.html if config['category'] == 'all': str_url = (base_url, 'recherche', query_url_fragment) url = '/'.join(str_url) else: category_url_fragment = '%s' % config['category'] str_url = (base_url, 'recherche', category_url_fragment, query_url_fragment) url = '/'.join(str_url) log.debug('search url: %s' % url + '.html') # GET URL f = task.requests.get(url + '.html').content soup = get_soup(f) if soup.findAll(text=re.compile(' 0 torrents')): log.debug('search returned no results') else: nextpage = 0 while nextpage >= 0: if nextpage > 0: newurl = url + '/page-' + str(nextpage) log.debug('-----> NEXT PAGE : %s' % newurl) f1 = task.requests.get(newurl).content soup = get_soup(f1) for result in soup.findAll('div', attrs={'class': re.compile('ligne')}): entry = Entry() link = result.find('a', attrs={'href': re.compile('dl-torrent')}) entry['title'] = link.contents[0] # REWRITE URL page_link = link.get('href') link_rewrite = page_link.split('/') # get last value in array remove .html and replace by .torrent endlink = link_rewrite[-1] str_url = (base_url, '/telechargement/', endlink[:-5], '.torrent') entry['url'] = ''.join(str_url) log.debug('Title: %s | DL LINK: %s' % (entry['title'], entry['url'])) entry['torrent_seeds'] = int( result.find('span', attrs={'class': re.compile('seed')}).text ) entry['torrent_leeches'] = int( result.find('div', attrs={'class': re.compile('down')}).text ) size = result.find('div', attrs={'class': re.compile('poid')}).text entry['content_size'] = parse_filesize(size, si=False) if entry['torrent_seeds'] > 0: entries.add(entry) else: log.debug('0 SEED, not adding entry') if soup.find(text=re.compile('Suiv')): nextpage += 1 else: nextpage = -1 return entries
def search(self, task, entry, config=None): """ Search for name from torrentleech. """ request_headers = {'User-Agent': 'curl/7.54.0'} rss_key = config['rss_key'] # build the form request: data = {'username': config['username'], 'password': config['password']} # POST the login form: try: login = task.requests.post( 'https://www.torrentleech.org/user/account/login/', data=data, headers=request_headers, allow_redirects=True, ) except RequestException as e: raise PluginError('Could not connect to torrentleech: %s' % str(e)) if login.url.endswith('/user/account/login/'): raise PluginError('Could not login to torrentleech, faulty credentials?') if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 categories = config.get('category', 'all') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '/categories/{}'.format(','.join(str(c) for c in categories)) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string).replace(":", "") # urllib.quote will crash if the unicode string has non ascii characters, # so encode in utf-8 beforehand url = ( 'https://www.torrentleech.org/torrents/browse/list/query/' + quote(query.encode('utf-8')) + filter_url ) log.debug('Using %s as torrentleech search url', url) results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json() for torrent in results['torrentList']: entry = Entry() entry['download_headers'] = request_headers entry['title'] = torrent['name'] # construct download URL torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format( torrent['fid'], rss_key, torrent['filename'] ) log.debug('RSS-ified download link: %s', torrent_url) entry['url'] = torrent_url # seeders/leechers entry['torrent_seeds'] = torrent['seeders'] entry['torrent_leeches'] = torrent['leechers'] entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) entry['content_size'] = parse_filesize(str(torrent['size']) + ' b') entries.add(entry) return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))
def search(self, task, entry, config): """CPASBIEN search plugin Config example: tv_search_cpasbien: discover: what: - trakt_list: username: xxxxxxx api_key: xxxxxxx series: watchlist from: - cpasbien: category: "series-vostfr" interval: 1 day ignore_estimations: yes Category is ONE of: all films series musique films-french 1080p 720p series-francaise films-dvdrip films-vostfr series-vostfr ebook """ base_url = 'http://www.cpasbien.io' entries = set() for search_string in entry.get('search_strings', [entry['title']]): search_string = search_string.replace(' ', '-').lower() search_string = search_string.replace('(', '') search_string = search_string.replace(')', '') query = normalize_unicode(search_string) query_url_fragment = quote_plus(query.encode('utf-8')) # http://www.cpasbien.pe/recherche/ncis.html if config['category'] == 'all': str_url = (base_url, 'recherche', query_url_fragment) url = '/'.join(str_url) else: category_url_fragment = '%s' % config['category'] str_url = (base_url, 'recherche', category_url_fragment, query_url_fragment) url = '/'.join(str_url) logger.debug('search url: {}', url + '.html') # GET URL f = task.requests.get(url + '.html').content soup = get_soup(f) if soup.findAll(text=re.compile(' 0 torrents')): logger.debug('search returned no results') else: nextpage = 0 while nextpage >= 0: if nextpage > 0: newurl = url + '/page-' + str(nextpage) logger.debug('-----> NEXT PAGE : {}', newurl) f1 = task.requests.get(newurl).content soup = get_soup(f1) for result in soup.findAll( 'div', attrs={'class': re.compile('ligne')}): entry = Entry() link = result.find( 'a', attrs={'href': re.compile('dl-torrent')}) entry['title'] = link.contents[0] # REWRITE URL page_link = link.get('href') link_rewrite = page_link.split('/') # get last value in array remove .html and replace by .torrent endlink = link_rewrite[-1] str_url = (base_url, '/telechargement/', endlink[:-5], '.torrent') entry['url'] = ''.join(str_url) logger.debug('Title: {} | DL LINK: {}', entry['title'], entry['url']) entry['torrent_seeds'] = int( result.find('span', attrs={ 'class': re.compile('seed') }).text) entry['torrent_leeches'] = int( result.find('div', attrs={ 'class': re.compile('down') }).text) size = result.find('div', attrs={ 'class': re.compile('poid') }).text entry['content_size'] = parse_filesize(size, si=False) if entry['torrent_seeds'] > 0: entries.add(entry) else: logger.debug('0 SEED, not adding entry') if soup.find(text=re.compile('Suiv')): nextpage += 1 else: nextpage = -1 return entries
def search(self, task, entry, config=None): """ Search for name from iptorrents """ categories = config.get('category', 'All') # Make sure categories is a list if not isinstance(categories, list): categories = [categories] # If there are any text categories, turn them into their id number categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories] filter_url = '&'.join((str(c) + '=') for c in categories) entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string) query = quote_plus(query.encode('utf8')) url = "{base_url}/t?{filter}&q={query}&qf=".format( base_url=BASE_URL, filter=filter_url, query=query ) log.debug('searching with url: %s' % url) req = requests.get( url, cookies={'uid': str(config['uid']), 'pass': config['password']} ) if '/u/' + str(config['uid']) not in req.text: raise plugin.PluginError("Invalid cookies (user not logged in)...") soup = get_soup(req.content, parser="html.parser") torrents = soup.find('table', {'id': 'torrents'}) results = torrents.findAll('tr') for torrent in results: if torrent.th and 'ac' in torrent.th.get('class'): # Header column continue if torrent.find('td', {'colspan': '99'}): log.debug('No results found for search %s', search_string) break entry = Entry() link = torrent.find('a', href=re.compile('download'))['href'] entry['url'] = "{base}{link}?torrent_pass={key}".format( base=BASE_URL, link=link, key=config.get('rss_key') ) entry['title'] = torrent.find('a', href=re.compile('details')).text seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text entry['torrent_seeds'] = int(seeders) entry['torrent_leeches'] = int(leechers) entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches'] ) size = torrent.findNext(text=re.compile(r'^([\.\d]+) ([GMK]?)B$')) size = re.search(r'^([\.\d]+) ([GMK]?)B$', size) entry['content_size'] = parse_filesize(size.group(0)) log.debug('Found entry %s', entry) entries.add(entry) return entries
def search(self, task, entry, config=None): config = self.process_config(config) feed = REPUTATIONS[config['reputation']] entries = set() for search_string in entry.get('search_strings', [entry['title']]): query = normalize_unicode(search_string + config.get('extra_terms', '')) for domain in ['eu', 'is']: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentz2.%s/%s?f=%s' % ( domain, feed, quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) try: r = task.requests.get(url) break except requests.ConnectionError as err: # The different domains all resolve to the same ip, so only try more if it was a dns error log.warning('torrentz.%s connection failed. Error: %s' % (domain, err)) continue except requests.RequestException as err: raise plugin.PluginError( 'Error getting torrentz search results: %s' % err) else: raise plugin.PluginError( 'Error getting torrentz search results') if not r.content.strip(): raise plugin.PluginError( 'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url) rss = feedparser.parse(r.content) if rss.get('bozo_exception'): raise plugin.PluginError('Got bozo_exception (bad rss feed)') for item in rss.entries: m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE, ) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['torrent_availability'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.add(entry) log.debug('Search got %d results' % len(entries)) return entries