def execute_searches(self, config, entries): """ :param config: Discover plugin config :param entries: List of pseudo entries to search :return: List of entries found from search engines listed under `from` configuration """ result = [] if config.get('type', 'normal') == 'normal': comparator = StringComparator(cutoff=0.7, cleaner=clean_title) elif config['type'] == 'exact': comparator = StringComparator(cutoff=0.9) elif config['type'] == 'any': comparator = AnyComparator() else: comparator = MovieComparator() for item in config['from']: if isinstance(item, dict): plugin_name, plugin_config = item.items()[0] else: plugin_name, plugin_config = item, None search = get_plugin_by_name(plugin_name).instance if not callable(getattr(search, 'search')): log.critical('Search plugin %s does not implement search method' % plugin_name) for entry in entries: try: search_results = search.search(entry['title'], comparator, plugin_config) log.debug('Discovered %s entries from %s' % (len(search_results), plugin_name)) result.extend(search_results[:config.get('limit')]) except (PluginError, PluginWarning): log.debug('No results from %s' % plugin_name) return sorted(result, reverse=True, key=lambda x: x.get('search_sort'))
def search(self, query, comparator=StringComparator(), config=None): if config: feed = REPUTATIONS[config] else: feed = REPUTATIONS['good'] # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand comparator.set_seq1(query) query = comparator.search_string() url = 'http://torrentz.eu/%s?q=%s' % ( feed, urllib.quote(query.encode('utf-8'))) log.debug('requesting: %s' % url) rss = feedparser.parse(url) entries = [] status = rss.get('status', False) if status != 200: raise PluginWarning('Search result not 200 (OK), received %s' % status) ex = rss.get('bozo_exception', False) if ex: raise PluginWarning('Got bozo_exception (bad feed)') for item in rss.entries: # assign confidence score of how close this link is to the name you're looking for. .6 and above is "close" comparator.set_seq2(item.title) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue m = re.search( r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)', item.description, re.IGNORECASE) if not m: log.debug('regexp did not find seeds / peer data') continue entry = Entry() entry['title'] = item.title entry['url'] = item.link entry['content_size'] = int(m.group(1)) entry['torrent_seeds'] = int(m.group(2).replace(',', '')) entry['torrent_leeches'] = int(m.group(3).replace(',', '')) entry['torrent_info_hash'] = m.group(4).upper() entry['search_ratio'] = comparator.ratio() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) entries.append(entry) # choose torrent if not entries: raise PluginWarning('No close matches for %s' % query, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) log.debug('Search got %d results' % len(entries)) return entries
def on_task_urlrewrite(self, task, config): # no searches in unit test mode if task.manager.unit_test: return plugins = {} for plugin in get_plugins_by_group('search'): plugins[plugin.name] = plugin.instance # search accepted for entry in task.accepted: found = False # loop through configured searches for name in config: search_config = None if isinstance(name, dict): # assume the name is the first/only key in the dict. name, search_config = name.items()[0] log.verbose('Searching `%s` from %s' % (entry['title'], name)) try: results = plugins[name].search( entry['title'], StringComparator(cutoff=0.9), search_config) if results: url = results[0]['url'] log.debug('Found url: %s' % url) entry['url'] = url found = True break except (PluginError, PluginWarning), pw: log.verbose('Failed: %s' % pw.value) continue # Search failed if not found: # If I don't have a URL, doesn't matter if I'm immortal... entry['immortal'] = False task.reject(entry, 'search failed')
def search(self, query, comparator=StringComparator(), config=None): """ Search for name from piratebay. """ if not isinstance(config, dict): config = {} sort = SORT.get(config.get('sort_by', 'seeds')) if config.get('sort_reverse'): sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/0/%d/%d' % (sort, category) comparator.set_seq1(query) query = comparator.search_string() # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.se/search/' + urllib.quote( query.encode('utf-8')) + filter_url log.debug('Using %s as piratebay search url' % url) page = requests.get(url).content soup = get_soup(page) entries = [] for link in soup.find_all('a', attrs={'class': 'detLink'}): comparator.set_seq2(link.contents[0]) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue entry = Entry() entry['title'] = link.contents[0] entry['url'] = 'http://thepiratebay.se' + link.get('href') tds = link.parent.parent.parent.find_all('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_ratio'] = comparator.ratio() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.find_next(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) entries.append(entry) if not entries: dashindex = query.rfind('-') if dashindex != -1: return self.search(query[:dashindex], comparator=comparator) else: raise PluginWarning('No close matches for %s' % query, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries
def entries_from_search(self, name, url=None, comparator=StringComparator(cutoff=0.9)): """Parses torrent download url from search results""" comparator.set_seq1(name) name = comparator.search_string() if not url: url = 'http://www.newtorrents.info/search/%s' % urllib.quote( name.encode('utf-8'), safe=':/~?=&%') log.debug('search url: %s' % url) html = urlopener(url, log).read() # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={ 'class': re.compile('s') }).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning( 'Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 #TODO: also parse content_size and peers from results if comparator.matches(release_name): torrents.append( Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_ratio=comparator.ratio(), search_sort=torrent_availability(seed, 0))) else: log.debug('rejecting search result: %s !~ %s' % (release_name, name)) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex], comparator=comparator) else: raise PluginWarning('No matches for %s' % name, log, log_once=True) else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug( 'search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def search(self, query, comparator=StringComparator(), config=None): """ Search for name from torrentleech. """ rss_key = config['rss_key'] # build the form request: data = { 'username': config['username'], 'password': config['password'], 'remember_me': 'on', 'submit': 'submit' } # POST the login form: login = requests.post('http://torrentleech.org/', data=data) if not isinstance(config, dict): config = {} # sort = SORT.get(config.get('sort_by', 'seeds')) # if config.get('sort_reverse'): # sort += 1 if isinstance(config.get('category'), int): category = config['category'] else: category = CATEGORIES.get(config.get('category', 'all')) filter_url = '/categories/%d' % (category) comparator.set_seq1(query) query = comparator.search_string() # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://torrentleech.org/torrents/browse/index/query/' + urllib.quote( query.encode('utf-8')) + filter_url log.debug('Using %s as torrentleech search url' % url) page = requests.get(url, cookies=login.cookies).content soup = get_soup(page) entries = [] for tr in soup.find_all("tr", ["even", "odd"]): # within each even or odd row, find the torrent names link = tr.find("a", attrs={'href': re.compile('/torrent/\d+')}) log.debug('link phase: %s' % link.contents[0]) # extracts the contents of the <a>titlename/<a> tag comparator.set_seq2(link.contents[0]) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue entry = Entry() entry['title'] = link.contents[0] # find download link torrent_url = tr.find("a", attrs={ 'href': re.compile('/download/\d+/.*') }).get('href') # parse link and split along /download/12345 and /name.torrent download_url = re.search('(/download/\d+)/(.+\.torrent)', torrent_url) # change link to rss and splice in rss_key torrent_url = 'http://torrentleech.org/rss' + download_url.group( 1) + '/' + rss_key + '/' + download_url.group(2) log.debug('RSS-ified download link: %s' % torrent_url) entry['url'] = torrent_url # us tr object for seeders/leechers seeders, leechers = tr.find_all('td', ["seeders", "leechers"]) entry['torrent_seeds'] = int(seeders.contents[0]) entry['torrent_leeches'] = int(leechers.contents[0]) entry['search_ratio'] = comparator.ratio() entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) # use tr object for size size = tr.find("td", text=re.compile('([\.\d]+) ([GMK])B')).contents[0] size = re.search('([\.\d]+) ([GMK])B', size) if size: if size.group(2) == 'G': entry['content_size'] = int( float(size.group(1)) * 1000**3 / 1024**2) elif size.group(2) == 'M': entry['content_size'] = int( float(size.group(1)) * 1000**2 / 1024**2) else: entry['content_size'] = int( float(size.group(1)) * 1000 / 1024**2) entries.append(entry) if not entries: dashindex = query.rfind('-') if dashindex != -1: return self.search(query[:dashindex], comparator=comparator) else: raise PluginWarning('No close matches for %s' % query, log, log_once=True) entries.sort(reverse=True, key=lambda x: x.get('search_sort')) return entries