def parse_download_page(self, url): txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) config = self.config or {} config.setdefault('quality', 'hd') links = soup.find_all('a', text="Descargar", href=re.compile("/subtitles")) if not links: raise UrlRewritingError('Unable to locate subtitle download link from url %s' % url) subtitle_url = '' for link in links: sub_url = link['href'] log.verbose('Found url %s', sub_url) if config['quality'] == 'hd' and re.search("720p|1080p",sub_url): subtitle_url = 'http://www.argenteam.net' + sub_url log.verbose('is a match') break if config['quality'] == 'sd' and re.search("720p|1080p",sub_url) == None: subtitle_url = 'http://www.argenteam.net' + sub_url log.verbose('is a match') break if subtitle_url == '': raise UrlRewritingError('Unable to locate download link %s from url %s' % (config['quality'], url)) return subtitle_url
def get_json(url): try: log.debug('fetching json at %s' % url) data = urlopener(url, log) except URLError, e: log.warning('Request failed %s' % url) return
def nzbid_from_search(self, url, name, query): """Parses nzb download url from api results""" import time import difflib matched_results = [] log.debug( "Sleeping to respect nzbmatrix rules about hammering the API") time.sleep(10) apireturn = self.parse_nzb_matrix_api( urlopener(url, log).read(), query) if not apireturn: return None else: names = [] for result in apireturn: names.append(result["NZBNAME"]) matches = difflib.get_close_matches(name, names, 1, 0.3) if len(matches) == 0: return None else: for result in apireturn: if result["NZBNAME"] == matches[0]: break for match in matches: # Already sorted for result in apireturn: if result.get(match, False): matched_results.append(result) return matched_results
def get_http_seeds(url, info_hash): url = get_scrape_url(url, info_hash) if not url: log.debug('if not url is true returning 0') return 0 log.debug('Checking for seeds from %s' % url) data = None try: data = bdecode(urlopener(url, log, retries=1, timeout=10).read()).get('files') except URLError as e: log.debug('Error scraping: %s' % e) return 0 except SyntaxError as e: log.warning('Error decoding tracker response: %s' % e) return 0 except BadStatusLine as e: log.warning('Error BadStatusLine: %s' % e) return 0 except IOError as e: log.warning('Server error: %s' % e) return 0 if not data: log.debug('No data received from tracker scrape.') return 0 log.debug('get_http_seeds is returning: %s' % data.values()[0]['complete']) return data.values()[0]['complete']
def get_file(self, only_cached=False): """Makes sure the poster is downloaded to the local cache (in userstatic folder) and returns the path split into a list of directory and file components""" from flexget.manager import manager base_dir = os.path.join(manager.config_base, 'userstatic') if self.file and os.path.isfile(os.path.join(base_dir, self.file)): return self.file.split(os.sep) elif only_cached: return # If we don't already have a local copy, download one. log.debug('Downloading poster %s' % self.url) dirname = os.path.join('tmdb', 'posters', str(self.movie_id)) # Create folders if they don't exist fullpath = os.path.join(base_dir, dirname) if not os.path.isdir(fullpath): os.makedirs(fullpath) filename = os.path.join(dirname, posixpath.basename(self.url)) thefile = file(os.path.join(base_dir, filename), 'wb') thefile.write(urlopener(self.url, log).read()) self.file = filename # If we are detached from a session, update the db if not Session.object_session(self): session = Session() poster = session.query(TMDBPoster).filter(TMDBPoster.db_id == self.db_id).first() if poster: poster.file = filename session.commit() session.close() return filename.split(os.sep)
def nzbid_from_search(self, url, name, query): """Parses nzb download url from api results""" import time import difflib matched_results = [] log.debug("Sleeping to respect nzbmatrix rules about hammering the API") time.sleep(10) apireturn = self.parse_nzb_matrix_api(urlopener(url, log).read(), query) if not apireturn: return None else: names = [] for result in apireturn: names.append(result["NZBNAME"]) matches = difflib.get_close_matches(name, names, 1, 0.3) if len(matches) == 0: return None else: for result in apireturn: if result["NZBNAME"] == matches[0]: break for match in matches: # Already sorted for result in apireturn: if result.get(match, False): matched_results.append(result) return matched_results
def on_task_output(self, task, config): for entry in task.accepted: if task.options.test: log.info('Would add into sabnzbd: %s' % entry['title']) continue params = self.get_params(config) # allow overriding the category if 'category' in entry: # Dirty hack over the next few lines to strip out non-ascii # chars. We're going to urlencode this, which causes # serious issues in python2.x if it's not ascii input. params['cat'] = ''.join([x for x in entry['category'] if ord(x) < 128]) params['name'] = ''.join([x for x in entry['url'] if ord(x) < 128]) # add cleaner nzb name (undocumented api feature) params['nzbname'] = ''.join([x for x in entry['title'] if ord(x) < 128]) request_url = config['url'] + urllib.urlencode(params) log.debug('request_url: %s' % request_url) try: response = urlopener(request_url, log).read() except Exception as e: log.critical('Failed to use sabnzbd. Requested %s' % request_url) log.critical('Result was: %s' % e) entry.fail('sabnzbd unreachable') if task.options.debug: log.exception(e) continue if 'error' in response.lower(): entry.fail(response.replace('\n', '')) else: log.info('Added `%s` to SABnzbd' % (entry['title']))
def get_file(self, only_cached=False): """Makes sure the poster is downloaded to the local cache (in userstatic folder) and returns the path split into a list of directory and file components""" from flexget.manager import manager base_dir = os.path.join(manager.config_base, 'userstatic') if self.file and os.path.isfile(os.path.join(base_dir, self.file)): return self.file.split(os.sep) elif only_cached: return # If we don't already have a local copy, download one. log.debug('Downloading poster %s' % self.url) dirname = os.path.join('tmdb', 'posters', str(self.movie_id)) # Create folders if they don't exist fullpath = os.path.join(base_dir, dirname) if not os.path.isdir(fullpath): os.makedirs(fullpath) filename = os.path.join(dirname, posixpath.basename(self.url)) thefile = file(os.path.join(base_dir, filename), 'wb') thefile.write(urlopener(self.url, log).read()) self.file = filename # If we are detached from a session, update the db if not Session.object_session(self): session = Session() poster = session.query(TMDBPoster).filter( TMDBPoster.db_id == self.db_id).first() if poster: poster.file = filename session.commit() session.close() return filename.split(os.sep)
def on_feed_output(self, feed, config): for entry in feed.accepted: if feed.manager.options.test: log.info('Would add into sabnzbd: %s' % entry['title']) continue params = self.get_params(config) # allow overriding the category if 'category' in entry: # Dirty hack over the next few lines to strip out non-ascii # chars. We're going to urlencode this, which causes # serious issues in python2.x if it's not ascii input. params['cat'] = ''.join([x for x in entry['category'] if ord(x) < 128]) params['name'] = ''.join([x for x in entry['url'] if ord(x) < 128]) # add cleaner nzb name (undocumented api feature) params['nzbname'] = ''.join([x for x in entry['title'] if ord(x) < 128]) request_url = config['url'] + urllib.urlencode(params) log.debug('request_url: %s' % request_url) try: response = urlopener(request_url, log).read() except Exception, e: log.critical('Failed to use sabnzbd. Requested %s' % request_url) log.critical('Result was: %s' % e) feed.fail(entry, 'sabnzbd unreachable') if feed.manager.options.debug: log.exception(e) continue if 'error' in response.lower(): feed.fail(entry, response.replace('\n', '')) else: log.info('Added `%s` to SABnzbd' % (entry['title']))
def get_json(url): try: log.debug("fetching json at %s" % url) data = urlopener(url, log) except URLError, e: log.warning("Request failed %s" % url) return
def on_task_output(self, task, config): for entry in task.accepted: if task.options.test: log.info("Would add into sabnzbd: %s" % entry["title"]) continue params = self.get_params(config) # allow overriding the category if "category" in entry: # Dirty hack over the next few lines to strip out non-ascii # chars. We're going to urlencode this, which causes # serious issues in python2.x if it's not ascii input. params["cat"] = "".join([x for x in entry["category"] if ord(x) < 128]) params["name"] = "".join([x for x in entry["url"] if ord(x) < 128]) # add cleaner nzb name (undocumented api feature) params["nzbname"] = "".join([x for x in entry["title"] if ord(x) < 128]) request_url = config["url"] + urllib.urlencode(params) log.debug("request_url: %s" % request_url) try: response = urlopener(request_url, log).read() except Exception as e: log.critical("Failed to use sabnzbd. Requested %s" % request_url) log.critical("Result was: %s" % e) entry.fail("sabnzbd unreachable") if task.options.debug: log.exception(e) continue if "error" in response.lower(): entry.fail(response.replace("\n", "")) else: log.info("Added `%s` to SABnzbd" % (entry["title"]))
def search(self, task, entry, config=None): txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Keep-Alive': '300', 'Connection': 'keep-alive', } nzbs = set() for search_string in entry.get('search_strings', [entry['title']]): query = entry['title'] url = u'http://newzleech.com/?%s' % str(urllib.urlencode({'q': query.encode('latin1'), 'm': 'search', 'group': '', 'min': 'min', 'max': 'max', 'age': '', 'minage': '', 'adv': ''})) # log.debug('Search url: %s' % url) req = urllib2.Request(url, headers=txheaders) page = urlopener(req, log) soup = get_soup(page) for item in soup.find_all('table', attrs={'class': 'contentt'}): subject_tag = item.find('td', attrs={'class': 'subject'}).next subject = ''.join(subject_tag.find_all(text=True)) complete = item.find('td', attrs={'class': 'complete'}).contents[0] size = item.find('td', attrs={'class': 'size'}).contents[0] nzb_url = 'http://newzleech.com/' + item.find('td', attrs={'class': 'get'}).next.get('href') # generate regexp from entry title and see if it matches subject regexp = query wildcardize = [' ', '-'] for wild in wildcardize: regexp = regexp.replace(wild, '.') regexp = '.*' + regexp + '.*' # log.debug('Title regexp: %s' % regexp) if re.match(regexp, subject): log.debug('%s matches to regexp' % subject) if complete != u'100': log.debug('Match is incomplete %s from newzleech, skipping ..' % query) continue log.info('Found \'%s\'' % query) try: size_num = float(size[:-3]) except (ValueError, TypeError): log.error('Failed to parse_size %s' % size) size_num = 0 # convert into megabytes if 'GB' in size: size_num *= 1024 if 'KB' in size: size_num /= 1024 # choose largest file nzbs.add(Entry(title=subject, url=nzb_url, content_size=size_num, search_sort=size_num)) return nzbs
def on_feed_input(self, feed, config): config = self.build_config(config) log.debug('InputPlugin html requesting url %s' % config['url']) if config.get('username') and config.get('password'): log.debug('Basic auth enabled. User: %s Password: %s' % (config['username'], config['password'])) passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, config['url'], config['username'], config['password']) handlers = [urllib2.HTTPBasicAuthHandler(passman)] else: handlers = None page = urlopener(config['url'], log, handlers=handlers) soup = get_soup(page) log.debug('Detected encoding %s' % soup.originalEncoding) # dump received content into a file if 'dump' in config: name = config['dump'] log.info('Dumping %s into %s' % (config['url'], name)) data = soup.prettify() f = open(name, 'w') f.write(data) f.close() return self.create_entries(config['url'], soup, config)
def on_task_input(self, task, config): config = self.build_config(config) log.debug('InputPlugin html requesting url %s' % config['url']) if config.get('username') and config.get('password'): log.debug('Basic auth enabled. User: %s Password: %s' % (config['username'], config['password'])) passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, config['url'], config['username'], config['password']) handlers = [urllib2.HTTPBasicAuthHandler(passman)] else: handlers = None page = urlopener(config['url'], log, handlers=handlers) soup = get_soup(page) log.debug('Detected encoding %s' % soup.originalEncoding) # dump received content into a file if 'dump' in config: name = config['dump'] log.info('Dumping %s into %s' % (config['url'], name)) data = soup.prettify() f = open(name, 'w') f.write(data) f.close() return self.create_entries(config['url'], soup, config)
def search(self, entry, config): url = "https://tehconnection.eu/torrents.php?searchstr=%s" \ % entry.get("imdb_id"); page = urlopener(url, log) soup = get_soup(page) results = set() for row in soup.find_all("tr", class_="group_torrent"): link = row.find(title="Download") info = row.find(colspan="1").contents[3].contents[0].strip() seeders = int(row.find_all("td")[6].contents[0].strip()) leechers = int(row.find_all("td")[7].contents[0].strip()) result = Entry(); result["title"] = entry.get("title") + " / " + info result["imdb_id"] = entry.get("imdb_id") result["url"] = "https://tehconnection.eu" + link.get("href") result["torrent_seeds"] = seeders result["torrent_leeches"] = leechers result["search_sort"] = torrent_availability(result['torrent_seeds'], result['torrent_leeches']) results.add(result) return results
def search(self, entry, config=None): txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Keep-Alive': '300', 'Connection': 'keep-alive', } nzbs = set() for search_string in entry.get('search_strings', [entry['title']]): query = entry['title'] url = u'http://newzleech.com/?%s' % str(urllib.urlencode({'q': query.encode('latin1'), 'm': 'search', 'group': '', 'min': 'min', 'max': 'max', 'age': '', 'minage': '', 'adv': ''})) #log.debug('Search url: %s' % url) req = urllib2.Request(url, headers=txheaders) page = urlopener(req, log) soup = get_soup(page) for item in soup.find_all('table', attrs={'class': 'contentt'}): subject_tag = item.find('td', attrs={'class': 'subject'}).next subject = ''.join(subject_tag.find_all(text=True)) complete = item.find('td', attrs={'class': 'complete'}).contents[0] size = item.find('td', attrs={'class': 'size'}).contents[0] nzb_url = 'http://newzleech.com/' + item.find('td', attrs={'class': 'get'}).next.get('href') # generate regexp from entry title and see if it matches subject regexp = query wildcardize = [' ', '-'] for wild in wildcardize: regexp = regexp.replace(wild, '.') regexp = '.*' + regexp + '.*' #log.debug('Title regexp: %s' % regexp) if re.match(regexp, subject): log.debug('%s matches to regexp' % subject) if complete != u'100': log.debug('Match is incomplete %s from newzleech, skipping ..' % query) continue log.info('Found \'%s\'' % query) try: size_num = float(size[:-3]) except (ValueError, TypeError): log.error('Failed to parse_size %s' % size) size_num = 0 # convert into megabytes if 'GB' in size: size_num *= 1024 if 'KB' in size: size_num /= 1024 # choose largest file nzbs.add(Entry(title=subject, url=nzb_url, content_size=size_num, search_sort=size_num)) return nzbs
def parse_download_page(self, url): txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception, e: raise UrlRewritingError(e)
def parse_download_page(self, url): page = urlopener(url, log) log.debug('%s opened', url) try: soup = get_soup(page) torrent_url = 'http://www.t411.me' + soup.find(text='Télécharger').findParent().get('href') except Exception, e: raise UrlRewritingError(e)
def process_invalid_content(self, feed, url): """If feedparser reports error, save the received data and log error.""" log.critical('Invalid XML received from feed %s' % feed.name) try: req = urlopener(url, log) except ValueError, exc: log.debug('invalid url `%s` due to %s (ok for a file)' % (url, exc)) return
def search_title(self, name, comparator=StringComparator(), url=None): """ Search for name from piratebay. If optional search :url: is passed it will be used instead of internal search. """ comparator.set_seq1(name) name = comparator.search_string() if not url: # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand url = 'http://thepiratebay.org/search/' + urllib.quote(name.encode('utf-8')) log.debug('Using %s as piratebay search url' % url) page = urlopener(url, log) soup = get_soup(page) entries = [] for link in soup.findAll('a', attrs={'class': 'detLink'}): comparator.set_seq2(link.contents[0]) log.debug('name: %s' % comparator.a) log.debug('found name: %s' % comparator.b) log.debug('confidence: %s' % comparator.ratio()) if not comparator.matches(): continue entry = Entry() entry['title'] = link.contents[0] entry['url'] = 'http://thepiratebay.org' + link.get('href') tds = link.parent.parent.parent.findAll('td') entry['torrent_seeds'] = int(tds[-2].contents[0]) entry['torrent_leeches'] = int(tds[-1].contents[0]) entry['search_ratio'] = comparator.ratio() entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) # Parse content_size size = link.findNext(attrs={'class': 'detDesc'}).contents[0] size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size) if size: if size.group(2) == 'G': entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2) elif size.group(2) == 'M': entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2) else: entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2) entries.append(entry) if not entries: dashindex = name.rfind('-') if dashindex != -1: return self.search_title(name[:dashindex], comparator=comparator) else: raise PluginWarning('No close matches for %s' % name, log, log_once=True) def score(a): return torrent_availability(a['torrent_seeds'], a['torrent_leeches']) entries.sort(reverse=True, key=lambda x: x.get('search_sorted')) #for torrent in torrents: # log.debug('%s link: %s' % (torrent, torrent['link'])) return entries
def parse_site(self, url, task): """Parse configured url and return releases array""" page = urlopener(url, log) soup = get_soup(page) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} title = entry.find('h2') if not title: log.debug('No h2 entrytitle') continue release['title'] = title.a.contents[0].strip() log.debug('Processing title %s' % (release['title'])) for link in entry.find_all('a'): # no content in the link if not link.contents: continue link_name = link.contents[0] if link_name is None: continue if not isinstance(link_name, NavigableString): continue link_name = link_name.strip().lower() if link.has_attr('href'): link_href = link['href'] else: continue log.debug('found link %s -> %s' % (link_name, link_href)) # handle imdb link if link_name.lower() == 'imdb': log.debug('found imdb link %s' % link_href) release['imdb_url'] = link_href # test if entry with this url would be rewritable by known plugins (ie. downloadable) temp = {} temp['title'] = release['title'] temp['url'] = link_href urlrewriting = plugin.get_plugin_by_name('urlrewriting') if urlrewriting['instance'].url_rewritable(task, temp): release['url'] = link_href log.trace('--> accepting %s (resolvable)' % link_href) else: log.trace('<-- ignoring %s (non-resolvable)' % link_href) # reject if no torrent link if not 'url' in release: from flexget.utils.log import log_once log_once( '%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log) else: releases.append(release) return releases
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = 'http://www.newtorrents.info/search/%s' % urllib.quote( name.encode('utf-8'), safe=b':/~?=&%') log.debug('search url: %s' % url) html = urlopener(url, log).read() # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.find_next('td', attrs={ 'class': re.compile('s') }).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning( 'Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 #TODO: also parse content_size and peers from results torrents.append( Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0))) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug( 'search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def get_first_result(tmdb_function, value): if isinstance(value, basestring): value = value.replace(' ', '+').encode('utf-8') url = '%s/2.1/Movie.%s/%s/json/%s/%s' % (server, tmdb_function, lang, api_key, value) try: data = urlopener(url, log) except URLError, e: log.warning('Request failed %s' % url) return
def parse_site(self, url, task): """Parse configured url and return releases array""" page = urlopener(url, log) soup = get_soup(page) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} title = entry.find('h2') if not title: log.debug('No h2 entrytitle') continue release['title'] = title.a.contents[0].strip() log.debug('Processing title %s' % (release['title'])) for link in entry.find_all('a'): # no content in the link if not link.contents: continue link_name = link.contents[0] if link_name is None: continue if not isinstance(link_name, NavigableString): continue link_name = link_name.strip().lower() if link.has_attr('href'): link_href = link['href'] else: continue log.debug('found link %s -> %s' % (link_name, link_href)) # handle imdb link if link_name.lower() == 'imdb': log.debug('found imdb link %s' % link_href) release['imdb_url'] = link_href # test if entry with this url would be rewritable by known plugins (ie. downloadable) temp = {} temp['title'] = release['title'] temp['url'] = link_href urlrewriting = plugin.get_plugin_by_name('urlrewriting') if urlrewriting['instance'].url_rewritable(task, temp): release['url'] = link_href log.trace('--> accepting %s (resolvable)' % link_href) else: log.trace('<-- ignoring %s (non-resolvable)' % link_href) # reject if no torrent link if not 'url' in release: from flexget.utils.log import log_once log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log) else: releases.append(release) return releases
def get_tracker_seeds(self, url, info_hash): url = self.get_scrape_url(url) if not url: return 0 log.debug('Checking for seeds from %s' % url) url += '?info_hash=%s' % quote(info_hash.decode('hex')) data = bdecode(urlopener(url, log, retries=2).read())['files'] if not data: return 0 return data.values()[0]['complete']
def get_tracker_seeds(self, url, info_hash): url = self.get_scrape_url(url) if not url: return 0 log.debug('Checking for seeds from %s' % url) url += '?info_hash=%s' % quote(info_hash.decode('hex')) try: data = bdecode(urlopener(url, log, retries=2).read()).get('files') except SyntaxError, e: log.warning('Error bdecoding tracker response: %s' % e) return 0
def get_tracker_seeds(url, info_hash): url = get_scrape_url(url, info_hash) if not url: log.debug('if not url is true returning 0') return 0 log.debug('Checking for seeds from %s' % url) try: data = bdecode(urlopener(url, log, retries=1, timeout=10).read()).get('files') except SyntaxError, e: log.warning('Error decoding tracker response: %s' % e) return 0
def on_task_input(self, task, config, session=None): account_id = str(config['account_id']) # Get the cache for this user user_favorites = session.query(ThetvdbFavorites).filter( ThetvdbFavorites.account_id == account_id).first() if user_favorites and user_favorites.updated > datetime.now( ) - timedelta(minutes=10): log.debug( 'Using cached thetvdb favorite series information for account ID %s' % account_id) else: try: url = 'http://thetvdb.com/api/User_Favorites.php?accountid=%s' % account_id log.debug('requesting %s' % url) data = ElementTree.fromstring(urlopener(url, log).read()) favorite_ids = [] for i in data.findall('Series'): if i.text: favorite_ids.append(i.text) except (urllib2.URLError, IOError, AttributeError): import traceback # If there are errors getting the favorites or parsing the xml, fall back on cache log.error( 'Error retrieving favorites from thetvdb, using cache.') log.debug(traceback.format_exc()) else: # Successfully updated from tvdb, update the database log.debug('Successfully updated favorites from thetvdb.com') if not user_favorites: user_favorites = ThetvdbFavorites(account_id, favorite_ids) else: user_favorites.series_ids = favorite_ids user_favorites.updated = datetime.now() session.merge(user_favorites) if not user_favorites.series_ids: log.warning('Didn\'t find any thetvdb.com favorites.') return # Construct list of entries with our series names entries = [] for series_id in user_favorites.series_ids: # Lookup the series name from the id try: series = lookup_series(tvdb_id=series_id) except LookupError as e: log.error('Error looking up %s from thetvdb: %s' % (series_id, e.message)) else: series_name = series.seriesname if config.get('strip_dates'): # Remove year from end of series name if present series_name = re.sub(r'\s+\(\d{4}\)$', '', series_name) entries.append(Entry(series_name, '', tvdb_id=series.id)) return entries
def parse_download_page(self, url): page = urlopener(url, log) try: soup = get_soup(page) tag_div = soup.find('div', attrs={'class': 'download'}) if not tag_div: raise UrlRewritingError('Unable to locate download link from url %s' % url) tag_a = tag_div.find('a') torrent_url = tag_a.get('href') return torrent_url except Exception, e: raise UrlRewritingError(e)
def parse_download_page(self, url): txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) down_link = soup.find('a', attrs={'href': re.compile("download/\d+/.*\.torrent")}) if not down_link: raise UrlRewritingError('Unable to locate download link from url %s' % url) return 'http://www.deadfrog.us/' + down_link.get('href')
def on_task_input(self, task, config): # use rss plugin rss_config = {'url': self.rss_url} rss_entries = super(AppleTrailers, self).on_task_input(task, rss_config) # Multiple entries can point to the same movie page (trailer 1, clip # 1, etc.) entries = {} for entry in rss_entries: url = entry['original_url'] if url in entries: continue else: title = entry['title'] entries[url] = title[:title.rfind('-')].rstrip() result = [] for url, title in entries.iteritems(): inc_url = url + 'includes/playlists/web.inc' try: page = urlopener(inc_url, log) except HTTPError, err: log.warning("HTTPError when opening playlist page: %d %s" % (err.code, err.reason)) continue soup = get_soup(page) links = soup.find_all('a', attrs={ 'class': 'target-quicktimeplayer', 'href': re.compile(r'_h?480p\.mov$') }) for link in links: url = link.get('href') url = url[:url.rfind('_')] quality = self.quality.lower() if quality == 'ipod': url += '_i320.m4v' else: url += '_h' + quality + '.mov' entry = Entry() entry['url'] = url entry['title'] = title match = re.search(r'.*/([^?#]*)', url) entry['filename'] = match.group(1) result.append(entry) log.debug('found trailer %s', url)
def on_task_input(self, task): pageurl = "http://tvtorrents.com/loggedin/recently_aired.do" log.debug("InputPlugin tvtorrents requesting url %s" % pageurl) page = urlopener(pageurl, log) soup = get_soup(page) hscript = soup.find('script', src=None).contents[0] hlines = hscript.splitlines() hash = hlines[15].strip().split("'")[1] digest = hlines[16].strip().split("'")[1] hurl = hlines[17].strip().split("'") hashurl = hurl[1] + "%s" + hurl[3] + digest + hurl[5] + hash for link in soup.find_all('a'): if not 'href' in link: continue url = link['href'] title = link.contents[0] if link.has_attr( 'onclick') and link['onclick'].find("loadTorrent") != -1: infohash = link['onclick'].split("'")[1] td = link.parent.parent.contents[4] sname = td.contents[0].strip() epi = td.contents[2].contents[0].strip() title = "%s - %s" % (sname, epi) url = hashurl % (infohash, ) else: continue if title is None: continue title = title.strip() if not title: continue # fix broken urls if url.startswith('//'): url = "http:" + url elif not url.startswith('http://') or not url.startswith( 'https://'): url = urlparse.urljoin(pageurl, url) # in case the title contains xxxxxxx.torrent - foooo.torrent clean it a bit (get upto first .torrent) if title.lower().find('.torrent') > 0: title = title[:title.lower().find(".torrent")] entry = Entry() entry['url'] = url entry['title'] = title task.entries.append(entry)
def on_task_input(self, task, config): account_id = str(config['account_id']) favorite_ids = [] # Get the cache for this user with Session() as session: user_favorites = session.query(ThetvdbFavorites).filter(ThetvdbFavorites.account_id == account_id).first() if user_favorites: favorite_ids = user_favorites.series_ids if user_favorites and user_favorites.updated > datetime.now() - timedelta(minutes=10): log.debug('Using cached thetvdb favorite series information for account ID %s' % account_id) else: try: url = 'http://thetvdb.com/api/User_Favorites.php?accountid=%s' % account_id log.debug('requesting %s' % url) data = ElementTree.fromstring(urlopener(url, log).read()) favorite_ids = [] for i in data.findall('Series'): if i.text: favorite_ids.append(i.text) except (urllib2.URLError, IOError, AttributeError): import traceback # If there are errors getting the favorites or parsing the xml, fall back on cache log.error('Error retrieving favorites from thetvdb, using cache.') log.debug(traceback.format_exc()) else: # Successfully updated from tvdb, update the database log.debug('Successfully updated favorites from thetvdb.com') if not user_favorites: user_favorites = ThetvdbFavorites(account_id, favorite_ids) else: user_favorites.series_ids = favorite_ids user_favorites.updated = datetime.now() session.merge(user_favorites) if not favorite_ids: log.warning('Didn\'t find any thetvdb.com favorites.') return # Construct list of entries with our series names entries = [] for series_id in favorite_ids: # Lookup the series name from the id try: series = lookup_series(tvdb_id=series_id) except LookupError as e: log.error('Error looking up %s from thetvdb: %s' % (series_id, e.args[0])) else: series_name = series.seriesname if config.get('strip_dates'): # Remove year from end of series name if present series_name = re.sub(r'\s+\(\d{4}\)$', '', series_name) entries.append(Entry(series_name, '', tvdb_id=series.id)) return entries
def entries_from_search(self, name, url=None, comparator=StringComparator(cutoff=0.9)): """Parses torrent download url from search results""" comparator.set_seq1(name) name = comparator.search_string() if not url: url = 'http://www.newtorrents.info/search/%s' % urllib.quote(name, safe=':/~?=&%') log.debug('search url: %s' % url) html = urlopener(url, log).read() # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.findAll('a', attrs={'href': re.compile('down.php')}): torrent_url = 'http://www.newtorrents.info%s' % link.get('href') release_name = link.parent.next.get('title') # quick dirty hack seed = link.findNext('td', attrs={'class': re.compile('s')}).renderContents() if seed == 'n/a': seed = 0 else: try: seed = int(seed) except ValueError: log.warning('Error converting seed value (%s) from newtorrents to integer.' % seed) seed = 0 #TODO: also parse content_size and peers from results if comparator.matches(release_name): torrents.append(Entry(title=release_name, url=torrent_url, torrent_seeds=seed, search_ratio=comparator.ratio(), search_sort=torrent_availability(seed, 0))) else: log.debug('rejecting search result: %s !~ %s' % (release_name, name)) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0)) # choose the torrent if not torrents: dashindex = name.rfind('-') if dashindex != -1: return self.entries_from_search(name[:dashindex], comparator=comparator) else: raise PluginWarning('No matches for %s' % name, log, log_once=True) else: if len(torrents) == 1: log.debug('found only one matching search result.') else: log.debug('search result contains multiple matches, sorted %s by most seeders' % torrents) return torrents
def get_json(url): try: log.debug('fetching json at %s' % url) data = urlopener(url, log) except URLError as e: log.warning('Request failed %s' % url) return try: result = json.load(data) except ValueError: log.warning('Rotten Tomatoes returned invalid json at: %s' % url) return return result
def parse_download_page(self, url): txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) tag_a = soup.find('a', attrs={'class': 'download_link'}) if not tag_a: raise UrlRewritingError('Unable to locate download link from url %s' % url) torrent_url = 'http://www.bakabt.com' + tag_a.get('href') return torrent_url
def parse_download_page(self, url): txheaders = {"User-agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"} req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) tag_a = soup.find("a", attrs={"class": "download_link"}) if not tag_a: raise UrlRewritingError("Unable to locate download link from url %s" % url) torrent_url = "http://www.bakabt.com" + tag_a.get("href") return torrent_url
def parse_download_page(self, url): page = urlopener(url, log) log.debug('%s opened', url) try: soup = get_soup(page) torrent_url = 'http://www.t411.me' + soup.find(text='Télécharger').findParent().get('href') except Exception as e: raise UrlRewritingError(e) if not torrent_url: raise UrlRewritingError('Unable to locate download link from url %s' % url) return torrent_url
def entries_from_search(self, name, url=None): """Parses torrent download url from search results""" name = normalize_unicode(name) if not url: url = "http://www.newtorrents.info/search/%s" % urllib.quote(name.encode("utf-8"), safe=b":/~?=&%") log.debug("search url: %s" % url) html = urlopener(url, log).read() # fix </SCR'+'IPT> so that BS does not crash # TODO: should use beautifulsoup massage html = re.sub(r"(</SCR.*?)...(.*?IPT>)", r"\1\2", html) soup = get_soup(html) # saving torrents in dict torrents = [] for link in soup.find_all("a", attrs={"href": re.compile("down.php")}): torrent_url = "http://www.newtorrents.info%s" % link.get("href") release_name = link.parent.next.get("title") # quick dirty hack seed = link.find_next("td", attrs={"class": re.compile("s")}).renderContents() if seed == "n/a": seed = 0 else: try: seed = int(seed) except ValueError: log.warning("Error converting seed value (%s) from newtorrents to integer." % seed) seed = 0 # TODO: also parse content_size and peers from results torrents.append( Entry( title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0) ) ) # sort with seed number Reverse order torrents.sort(reverse=True, key=lambda x: x.get("search_sort", 0)) # choose the torrent if not torrents: dashindex = name.rfind("-") if dashindex != -1: return self.entries_from_search(name[:dashindex]) else: return torrents else: if len(torrents) == 1: log.debug("found only one matching search result.") else: log.debug("search result contains multiple matches, sorted %s by most seeders" % torrents) return torrents
def on_feed_input(self, feed): pageurl = "http://tvtorrents.com/loggedin/recently_aired.do" log.debug("InputPlugin tvtorrents requesting url %s" % pageurl) page = urlopener(pageurl, log) soup = get_soup(page) hscript = soup.find('script', src=None).contents[0] hlines = hscript.splitlines() hash = hlines[15].strip().split("'")[1] digest = hlines[16].strip().split("'")[1] hurl = hlines[17].strip().split("'") hashurl = hurl[1] + "%s" + hurl[3] + digest + hurl[5] + hash for link in soup.findAll('a'): if not 'href' in link: continue url = link['href'] title = link.contents[0] if link.has_key('onclick') and link['onclick'].find("loadTorrent") != -1: infohash = link['onclick'].split("'")[1] td = link.parent.parent.contents[4] sname = td.contents[0].strip() epi = td.contents[2].contents[0].strip() title = "%s - %s" % (sname, epi) url = hashurl % (infohash,) else: continue if title is None: continue title = title.strip() if not title: continue # fix broken urls if url.startswith('//'): url = "http:" + url elif not url.startswith('http://') or not url.startswith('https://'): url = urlparse.urljoin(pageurl, url) # in case the title contains xxxxxxx.torrent - foooo.torrent clean it a bit (get upto first .torrent) if title.lower().find('.torrent') > 0: title = title[:title.lower().find(".torrent")] entry = Entry() entry['url'] = url entry['title'] = title feed.entries.append(entry)
def on_task_input(self, task, config): entries = [] page = urlopener(config['url'], log) for row in csv.reader(page): if not row: continue entry = Entry() for name, index in config.get('values', {}).items(): try: entry[name] = row[index - 1] except IndexError: raise Exception('Field `%s` index is out of range' % name) entries.append(entry) return entries
def url_from_page(self, url): """Parses torrent url from newtorrents download page""" try: page = urlopener(url, log) data = page.read() except urllib2.URLError: raise UrlRewritingError('URLerror when retrieving page') p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE) f = p.search(data) if not f: # the link in which plugin relies is missing! raise UrlRewritingError('Failed to get url from download page. Plugin may need a update.') else: return f.group(1)
def update_rated(self, task, config): """Update my movies list""" # set first last_time into past so we trigger update on first run next_time = task.simple_persistence.setdefault('next_time', datetime.datetime.min) log.debug('next_time: %s' % next_time) if not datetime.datetime.now() > next_time: return task.simple_persistence['next_time'] = datetime.datetime.now( ) + datetime.timedelta(hours=4) log.debug('updating my movies from %s' % config['url']) massage = [] # fix imdb html, just enough to pass parser # # <td class=list bgcolor="#CCCCCC"} colspan="4"> # ^ god damn noobs massage.append((re.compile('"}'), lambda match: '"')) # onclick="(new Image()).src='/rg/home/navbar/images/b.gif?link=/'"">IMDb</a> # ^ are you even trying? massage.append((re.compile('/\'""'), lambda match: '/\'"')) # <table class="footer" id="amazon-affiliates""> # ^ ffs, I don't think they are even trying ... massage.append((re.compile('amazon-affiliates""'), lambda match: 'amazon-affiliates"')) data = urlopener(config['url'], log) soup = BeautifulSoup(data) count = 0 for a_imdb_link in soup.find_all( 'a', attrs={'href': re.compile(r'/title/tt\d+')}): imdb_url = 'http://www.imdb.com%s' % a_imdb_link.get('href') if not task.session.query(ImdbRated).filter(ImdbRated.url == config['url']).\ filter(ImdbRated.imdb_url == imdb_url).first(): rated = ImdbRated(config['url'], imdb_url) task.session.add(rated) log.debug('adding %s' % rated) count += 1 if count > 0: log.info('Added %s new movies' % count)
def parse_download_page(self, url): txheaders = { 'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } req = urllib2.Request(url, None, txheaders) page = urlopener(req, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) down_link = soup.find('a', attrs={'href': re.compile(".+mp4")}) if not down_link: raise UrlRewritingError( 'Unable to locate download link from url %s' % url) return down_link.get('href')
def parse_download_page(self, page_url): page = urlopener(page_url, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) tag_a = soup.find("a", {"class": "dl_link"}) if not tag_a: raise UrlRewritingError( 'FTDB Unable to locate download link from url %s and tag_a is : %s' % (page_url, tag_a)) torrent_url = "http://www3.frenchtorrentdb.com" + tag_a.get( 'href') + "&js=1" log.debug('TORRENT URL is : %s' % torrent_url) return torrent_url
def parse_download_page(self, url): page = urlopener(url, log) log.debug('%s opened', url) try: soup = get_soup(page) torrent_url = 'http://www.t411.me' + soup.find( text='Télécharger').findParent().get('href') except Exception as e: raise UrlRewritingError(e) if not torrent_url: raise UrlRewritingError( 'Unable to locate download link from url %s' % url) return torrent_url
def get_first_result(tmdb_function, value): if isinstance(value, basestring): value = quote(value.encode('utf-8'), safe='') url = '%s/2.1/Movie.%s/%s/json/%s/%s' % (server, tmdb_function, lang, api_key, value) try: data = urlopener(url, log) except URLError: log.warning('Request failed %s' % url) return try: result = json.load(data) except ValueError: log.warning('TMDb returned invalid json.') return # Make sure there is a valid result to return if isinstance(result, list) and len(result): result = result[0] if isinstance(result, dict) and result.get('id'): return result
def parse_download_page(self, page_url): page = urlopener(page_url, log) try: soup = get_soup(page) except Exception as e: raise UrlRewritingError(e) tag_a = soup.find("a", {"class": "dl_link"}) if not tag_a: if soup.findAll(text="Connexion ?"): raise UrlRewritingError('You are not logged in,\ check if your cookie for\ authentication is up to date') else: raise UrlRewritingError('You have reached your download\ limit per 24hours, so I cannot\ get the torrent') torrent_url = ("http://www.frenchtorrentdb.com" + tag_a.get('href') + "&js=1") log.debug('TORRENT URL is : %s' % torrent_url) return torrent_url
def url_rewrite(self, task, entry): try: # need to fake user agent txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} req = urllib2.Request(entry['url'], None, txheaders) page = urlopener(req, log) soup = get_soup(page) results = soup.find_all('a', attrs={'class': 'l'}) if not results: raise UrlRewritingError('No results') for res in results: url = res.get('href') url = url.replace('/interstitial?url=', '') # generate match regexp from google search result title regexp = '.*'.join([x.contents[0] for x in res.find_all('em')]) if re.match(regexp, entry['title']): log.debug('resolved, found with %s' % regexp) entry['url'] = url return raise UrlRewritingError('Unable to resolve') except Exception as e: raise UrlRewritingError(e)
def search(self, query, comparator, config=None): # TODO: Implement comparator matching url = u'http://newzleech.com/?%s' % str( urllib.urlencode({ 'q': query.encode('latin1'), 'm': 'search', 'group': '', 'min': 'min', 'max': 'max', 'age': '', 'minage': '', 'adv': '' })) #log.debug('Search url: %s' % url) txheaders = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Keep-Alive': '300', 'Connection': 'keep-alive', } req = urllib2.Request(url, headers=txheaders) page = urlopener(req, log) soup = get_soup(page) nzbs = [] for item in soup.find_all('table', attrs={'class': 'contentt'}): subject_tag = item.find('td', attrs={'class': 'subject'}).next subject = ''.join(subject_tag.find_all(text=True)) complete = item.find('td', attrs={'class': 'complete'}).contents[0] size = item.find('td', attrs={'class': 'size'}).contents[0] nzb_url = 'http://newzleech.com/' + item.find('td', attrs={ 'class': 'get' }).next.get('href') #TODO: confidence match # generate regexp from entry title and see if it matches subject regexp = query wildcardize = [' ', '-'] for wild in wildcardize: regexp = regexp.replace(wild, '.') regexp = '.*' + regexp + '.*' #log.debug('Title regexp: %s' % regexp) if re.match(regexp, subject): log.debug('%s matches to regexp' % subject) if complete != u'100': log.debug( 'Match is incomplete %s from newzleech, skipping ..' % query) continue log.info('Found \'%s\'' % query) def parse_size(value): try: num = float(value[:-3]) except: log.error('Failed to parse_size %s' % value) return 0 # convert into megabytes if 'GB' in value: num *= 1024 if 'KB' in value: num /= 1024 return num nzb = Entry(title=subject, url=nzb_url, content_size=parse_size(size)) nzb['url'] = nzb_url nzb['size'] = parse_size(size) nzbs.append(nzb) if not nzbs: log.debug('Unable to find %s' % query) return # choose largest file nzbs.sort(reverse=True, key=lambda x: x.get('content_size', 0)) return nzbs
def on_task_download(self, task): # filter all entries that have IMDB ID set try: entries = filter(lambda x: x['imdb_url'] is not None, task.accepted) except KeyError: # No imdb urls on this task, skip it # TODO: should do lookup via imdb_lookup plugin? return try: s = ServerProxy("http://api.opensubtitles.org/xml-rpc") res = s.LogIn("", "", "en", "FlexGet") except: log.warning('Error connecting to opensubtitles.org') return if res['status'] != '200 OK': raise Exception( "Login to opensubtitles.org XML-RPC interface failed") config = self.get_config(task) token = res['token'] # configuration languages = config['languages'] min_sub_rating = config['min_sub_rating'] match_limit = config[ 'match_limit'] # no need to change this, but it should be configurable # loop through the entries for entry in entries: # dig out the raw imdb id m = re.search("tt(\d+)/$", entry['imdb_url']) if not m: log.debug("no match for %s" % entry['imdb_url']) continue imdbid = m.group(1) query = [] for language in languages: query.append({'sublanguageid': language, 'imdbid': imdbid}) subtitles = s.SearchSubtitles(token, query) subtitles = subtitles['data'] # nothing found -> continue if not subtitles: continue # filter bad subs subtitles = filter(lambda x: x['SubBad'] == '0', subtitles) # some quality required (0.0 == not reviewed) subtitles = filter( lambda x: float(x['SubRating']) >= min_sub_rating or float(x[ 'SubRating']) == 0.0, subtitles) filtered_subs = [] # find the best rated subs for each language for language in languages: langsubs = filter(lambda x: x['SubLanguageID'] == language, subtitles) # did we find any subs for this language? if langsubs: def seqmatch(subfile): s = difflib.SequenceMatcher(lambda x: x in " ._", entry['title'], subfile) #print "matching: ", entry['title'], subfile, s.ratio() return s.ratio() > match_limit # filter only those that have matching release names langsubs = filter( lambda x: seqmatch(x['MovieReleaseName']), subtitles) if langsubs: # find the best one by SubRating langsubs.sort(key=lambda x: float(x['SubRating'])) langsubs.reverse() filtered_subs.append(langsubs[0]) # download for sub in filtered_subs: log.debug('SUBS FOUND: ', sub['MovieReleaseName'], sub['SubRating'], sub['SubLanguageID']) f = urlopener(sub['ZipDownloadLink'], log) subfilename = re.match( '^attachment; filename="(.*)"$', f.info()['content-disposition']).group(1) outfile = os.path.join(config['output'], subfilename) fp = file(outfile, 'w') fp.write(f.read()) fp.close() f.close() s.LogOut(token)
def post_json_to_trakt(self, url, data): """Dumps data as json and POSTs it to the specified url.""" req = urllib2.Request(url, json.dumps(data), {'content-type': 'application/json'}) return urlopener(req, log)