def parse_what(self, what): """Given an imdb id or movie title, looks up from imdb and returns a dict with imdb_id and title keys""" imdb_id = extract_id(what) title = what if imdb_id: # Given an imdb id, find title parser = ImdbParser() try: parser.parse('http://www.imdb.com/title/%s' % imdb_id) except Exception: raise QueueError('Error parsing info from imdb for %s' % imdb_id) if parser.name: title = parser.name else: # Given a title, try to do imdb search for id console('Searching imdb for %s' % what) search = ImdbSearch() result = search.smart_match(what) if not result: raise QueueError( 'ERROR: Unable to find any such movie from imdb, use imdb url instead.' ) imdb_id = extract_id(result['url']) title = result['name'] self.options['imdb_id'] = imdb_id self.options['title'] = title return {'title': title, 'imdb_id': imdb_id}
def on_task_metainfo(self, task, config): # check if disabled (value set to false) if 'scan_imdb' in task.config: if not task.config['scan_imdb']: return for entry in task.entries: # Don't override already populated imdb_ids if entry.get('imdb_id', eval_lazy=False): continue if 'description' not in entry: continue urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description']) # Find unique imdb ids imdb_ids = [_f for _f in set(extract_id(url) for url in urls) if _f] if not imdb_ids: continue if len(imdb_ids) > 1: log.debug('Found multiple imdb ids; not using any of: %s' % ' '.join(imdb_ids)) continue entry['imdb_id'] = imdb_ids[0] entry['imdb_url'] = make_url(entry['imdb_id']) log.debug('Found imdb url in description %s' % entry['imdb_url'])
def seen_search(options, session=None): search_term = options.search_term if is_imdb_url(search_term): console('IMDB url detected, parsing ID') imdb_id = extract_id(search_term) if imdb_id: search_term = imdb_id else: console("Could not parse IMDB ID") else: search_term = '%' + options.search_term + '%' seen_entries = seen.search(value=search_term, status=None, session=session) table_data = [] for se in seen_entries.all(): table_data.append(['Title', se.title]) for sf in se.fields: if sf.field.lower() == 'title': continue table_data.append(['{}'.format(sf.field.upper()), str(sf.value)]) table_data.append(['Task', se.task]) table_data.append(['Added', se.added.strftime('%Y-%m-%d %H:%M')]) if options.table_type != 'porcelain': table_data.append(['', '']) if not table_data: console('No results found for search') return if options.table_type != 'porcelain': del table_data[-1] try: table = TerminalTable(options.table_type, table_data, wrap_columns=[1]) table.table.inner_heading_row_border = False console(table.output) except TerminalTableError as e: console('ERROR: %s' % str(e))
def parse_what(what, lookup=True, session=None): """ Determines what information was provided by the search string `what`. If `lookup` is true, will fill in other information from tmdb. :param what: Can be one of: <Movie Title>: Search based on title imdb_id=<IMDB id>: search based on imdb id tmdb_id=<TMDB id>: search based on tmdb id :param bool lookup: Whether missing info should be filled in from tmdb. :param session: An existing session that will be used for lookups if provided. :rtype: dict :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys """ tmdb_lookup = get_plugin_by_name('api_tmdb').instance.lookup result = {'title': None, 'imdb_id': None, 'tmdb_id': None} result['imdb_id'] = extract_id(what) if not result['imdb_id'] and what.startswith('tmdb_id='): result['tmdb_id'] = what[8:] else: result['title'] = what if not lookup: # If not doing an online lookup we can return here return result try: result['session'] = session movie = tmdb_lookup(**result) except LookupError, e: raise QueueError(e.message)
def upgrade(ver, session): if ver is None: log.info('Converting seen imdb_url to imdb_id for seen movies.') field_table = table_schema('seen_field', session) for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url')): new_values = {'field': 'imdb_id', 'value': extract_id(row['value'])} session.execute(update(field_table, field_table.c.id == row['id'], new_values)) ver = 1 if ver == 1: field_table = table_schema('seen_field', session) log.info('Adding index to seen_field table.') Index('ix_seen_field_seen_entry_id', field_table.c.seen_entry_id).create(bind=session.bind) ver = 2 if ver == 2: log.info('Adding local column to seen_entry table') table_add_column('seen_entry', 'local', Boolean, session, default=False) ver = 3 if ver == 3: # setting the default to False in the last migration was broken, fix the data log.info('Repairing seen table') entry_table = table_schema('seen_entry', session) session.execute(update(entry_table, entry_table.c.local == None, {'local': False})) ver = 4 return ver
def on_task_metainfo(self, task, config): # check if disabled (value set to false) if 'scan_imdb' in task.config: if not task.config['scan_imdb']: return for entry in task.entries: # Don't override already populated imdb_ids if entry.get('imdb_id', eval_lazy=False): continue if not 'description' in entry: continue urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description']) # Find unique imdb ids imdb_ids = filter(None, set(extract_id(url) for url in urls)) if not imdb_ids: continue if len(imdb_ids) > 1: log.debug('Found multiple imdb ids; not using any of: %s' % ' '.join(imdb_ids)) continue entry['imdb_id'] = imdb_ids[0] entry['imdb_url'] = make_url(entry['imdb_id']) log.debug('Found imdb url in description %s' % entry['imdb_url'])
def upgrade(ver, session): if ver is None: log.info("Converting seen imdb_url to imdb_id for seen movies.") field_table = table_schema("seen_field", session) for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == "imdb_url")): new_values = {"field": "imdb_id", "value": extract_id(row["value"])} session.execute(update(field_table, field_table.c.id == row["id"], new_values)) ver = 1 if ver == 1: field_table = table_schema("seen_field", session) log.info("Adding index to seen_field table.") Index("ix_seen_field_seen_entry_id", field_table.c.seen_entry_id).create(bind=session.bind) ver = 2 if ver == 2: log.info("Adding local column to seen_entry table") table_add_column("seen_entry", "local", Boolean, session, default=False) ver = 3 if ver == 3: # setting the default to False in the last migration was broken, fix the data log.info("Repairing seen table") entry_table = table_schema("seen_entry", session) session.execute(update(entry_table, entry_table.c.local == None, {"local": False})) ver = 4 return ver
def seen_search(options, session=None): search_term = options.search_term if is_imdb_url(search_term): console('IMDB url detected, parsing ID') imdb_id = extract_id(search_term) if imdb_id: search_term = imdb_id else: console("Could not parse IMDB ID") else: search_term = '%' + options.search_term + '%' seen_entries = plugin_seen.search(value=search_term, status=None, session=session) table_data = [] for se in seen_entries.all(): table_data.append(['Title', se.title]) for sf in se.fields: if sf.field.lower() == 'title': continue table_data.append(['{}'.format(sf.field.upper()), str(sf.value)]) table_data.append(['Task', se.task]) table_data.append(['Added', se.added.strftime('%Y-%m-%d %H:%M')]) if options.table_type != 'porcelain': table_data.append(['', '']) if not table_data: console('No results found for search') return if options.table_type != 'porcelain': del table_data[-1] try: table = TerminalTable(options.table_type, table_data, wrap_columns=[1]) table.table.inner_heading_row_border = False console(table.output) except TerminalTableError as e: console('ERROR: %s' % str(e))
def __setitem__(self, key, value): # Enforce unicode compatibility. Check for all subclasses of basestring, so that NavigableStrings are also cast if isinstance(value, basestring) and not type(value) == unicode: try: value = unicode(value) except UnicodeDecodeError: raise EntryUnicodeError(key, value) # url and original_url handling if key == 'url': if not isinstance(value, basestring): raise PluginError('Tried to set %r url to %r' % (self.get('title'), value)) if not 'original_url' in self: self['original_url'] = value # title handling if key == 'title': if not isinstance(value, basestring): raise PluginError('Tried to set title to %r' % value) # TODO: HACK! Implement via plugin once #348 (entry events) is implemented # enforces imdb_url in same format if key == 'imdb_url' and isinstance(value, basestring): imdb_id = extract_id(value) if imdb_id: value = make_url(imdb_id) else: log.debug('Tried to set imdb_id to invalid imdb url: %s' % value) value = None try: log.trace('ENTRY SET: %s = %r' % (key, value)) except Exception, e: log.debug('trying to debug key `%s` value threw exception: %s' % (key, e))
def on_process_start(self, feed): if not feed.manager.options.repair_seen_movies: return feed.manager.disable_feeds() from progressbar import ProgressBar, Percentage, Bar, ETA from flexget.manager import Session from seen import SeenField from flexget.utils.imdb import extract_id session = Session() index = 0 count = 0 total = session.query(SeenField).filter(SeenField.field == u'imdb_url').count() widgets = ['Repairing: ', ETA(), ' ', Percentage(), ' ', Bar(left='[', right=']')] bar = ProgressBar(widgets=widgets, maxval=total).start() for seen in session.query(SeenField).filter(SeenField.field == u'imdb_url').all(): index += 1 if index % 5 == 0: bar.update(index) value = u'http://www.imdb.com/title/%s/' % extract_id(seen.value) if value != seen.value: count += 1 seen.value = value seen.field = unicode('imdb_url') bar.finish() session.commit() print 'Fixed %s/%s URLs' % (count, total)
def __setitem__(self, key, value): # Enforce unicode compatibility. Check for all subclasses of basestring, so that NavigableStrings are also cast if isinstance(value, basestring) and not type(value) == unicode: try: value = unicode(value) except UnicodeDecodeError: raise EntryUnicodeError(key, value) # url and original_url handling if key == 'url': if not isinstance(value, basestring): raise PluginError('Tried to set %r url to %r' % (self.get('title'), value)) self.setdefault('original_url', value) # title handling if key == 'title': if not isinstance(value, basestring): raise PluginError('Tried to set title to %r' % value) # TODO: HACK! Implement via plugin once #348 (entry events) is implemented # enforces imdb_url in same format if key == 'imdb_url' and isinstance(value, basestring): imdb_id = extract_id(value) if imdb_id: value = make_url(imdb_id) else: log.debug('Tried to set imdb_id to invalid imdb url: %s' % value) value = None try: log.trace('ENTRY SET: %s = %r' % (key, value)) except Exception as e: log.debug('trying to debug key `%s` value threw exception: %s' % (key, e)) dict.__setitem__(self, key, value)
def parse_site(self, url, task): """Parse configured url and return releases array""" try: page = task.requests.get(url).content except RequestException as e: raise plugin.PluginError('Error getting input page: %e' % e) soup = get_soup(page) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} title = entry.find('h2') if not title: log.debug('No h2 entrytitle') continue release['title'] = title.a.contents[0].strip() log.debug('Processing title %s' % (release['title'])) for link in entry.find_all('a'): # no content in the link if not link.contents: continue link_name = link.contents[0] if link_name is None: continue if not isinstance(link_name, NavigableString): continue link_name = link_name.strip().lower() if link.has_attr('href'): link_href = link['href'] else: continue log.debug('found link %s -> %s' % (link_name, link_href)) # handle imdb link if link_name.lower() == 'imdb': log.debug('found imdb link %s' % link_href) release['imdb_id'] = extract_id(link_href) # test if entry with this url would be rewritable by known plugins (ie. downloadable) temp = {} temp['title'] = release['title'] temp['url'] = link_href urlrewriting = plugin.get_plugin_by_name('urlrewriting') if urlrewriting['instance'].url_rewritable(task, temp): release['url'] = link_href log.trace('--> accepting %s (resolvable)' % link_href) else: log.trace('<-- ignoring %s (non-resolvable)' % link_href) # reject if no torrent link if 'url' not in release: from flexget.utils.log import log_once log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log) else: releases.append(release) return releases
def on_task_input(self, task, config): log.verbose('Retrieving list %s ...' % config['list']) # Get the imdb list in RSS format if config['list'] in ['watchlist', 'ratings', 'checkins']: url = 'http://rss.imdb.com/user/%s/%s' % (config['user_id'], config['list']) else: url = 'http://rss.imdb.com/list/%s' % config['list'] log.debug('Requesting %s' % url) try: rss = feedparser.parse(url) except LookupError as e: raise plugin.PluginError('Failed to parse RSS feed for list `%s` correctly: %s' % (config['list'], e)) if rss.status == 404: raise plugin.PluginError('Unable to get imdb list. Either list is private or does not exist.') # Create an Entry for each movie in the list entries = [] title_re = re.compile(r'(.*) \((\d{4})?.*?\)$') for entry in rss.entries: try: # IMDb puts some extra stuff in the titles, e.g. "Battlestar Galactica (2004 TV Series)" # Strip out everything but the date match = title_re.match(entry.title) title = match.group(1) if match.group(2): title += ' (%s)' % match.group(2) entries.append( Entry(title=title, url=entry.link, imdb_id=extract_id(entry.link), imdb_name=match.group(1))) except IndexError: log.critical('IndexError! Unable to handle RSS entry: %s' % entry) return entries
def parse_site(self, url, task): """Parse configured url and return releases array""" try: page = task.requests.get(url).content except RequestException as e: raise plugin.PluginError('Error getting input page: %e' % e) soup = get_soup(page) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} title = entry.find('h2') if not title: log.debug('No h2 entrytitle') continue release['title'] = title.a.contents[0].strip() log.debug('Processing title %s' % (release['title'])) for link in entry.find_all('a'): # no content in the link if not link.contents: continue link_name = link.contents[0] if link_name is None: continue if not isinstance(link_name, NavigableString): continue link_name = link_name.strip().lower() if link.has_attr('href'): link_href = link['href'] else: continue log.debug('found link %s -> %s' % (link_name, link_href)) # handle imdb link if link_name.lower() == 'imdb': log.debug('found imdb link %s' % link_href) release['imdb_id'] = extract_id(link_href) # test if entry with this url would be rewritable by known plugins (ie. downloadable) temp = {} temp['title'] = release['title'] temp['url'] = link_href urlrewriting = plugin.get_plugin_by_name('urlrewriting') if urlrewriting['instance'].url_rewritable(task, temp): release['url'] = link_href log.trace('--> accepting %s (resolvable)' % link_href) else: log.trace('<-- ignoring %s (non-resolvable)' % link_href) # reject if no torrent link if not 'url' in release: from flexget.utils.log import log_once log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log) else: releases.append(release) return releases
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id seen.add(seen_name, "cli_add", {"cli_add": seen_name}) console("Added %s as seen. This will affect all tasks." % seen_name)
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id seen.add(seen_name, 'cli_add', {'cli_add': seen_name}) console('Added %s as seen. This will affect all tasks.' % seen_name)
def search(self, task, entry, config): if not session.cookies or not session.passkey: try: login_params = { 'username': config['username'], 'password': config['password'], 'loginkey': config['login_key'] } r = session.post('https://piratethenet.org/takelogin.php', data=login_params, verify=False) except requests.RequestException as e: log.error('Error while logging in to PtN: %s', e) raise plugin.PluginError('Could not log in to PtN') # Sorty hacky, we'll just store the passkey on the session passkey = re.search('passkey=([\d\w]+)"', r.text) if passkey: session.passkey = passkey.group(1) else: log.error('PtN cookie info invalid') raise plugin.PluginError('PTN cookie info invalid') search_params = default_search_params.copy() if 'movie_name' in entry: if 'movie_year' in entry: search_params[ 'advancedsearchparameters'] = '[year=%s]' % entry[ 'movie_year'] searches = [entry['movie_name']] else: searches = entry.get('search_strings', [entry['title']]) results = set() for search in searches: search_params['searchstring'] = search try: r = session.get('http://piratethenet.org/torrentsutils.php', params=search_params) except requests.RequestException as e: log.error('Error searching ptn: %s' % e) continue # html5parser doesn't work properly for some reason soup = get_soup(r.text, parser='html.parser') for movie in soup.select('.torrentstd'): imdb_id = movie.find('a', href=re.compile('.*imdb\.com/title/tt')) if imdb_id: imdb_id = extract_id(imdb_id['href']) if imdb_id and 'imdb_id' in entry and imdb_id != entry[ 'imdb_id']: continue results.update( self.create_entries(movie, passkey=session.passkey, imdb_id=imdb_id)) return results
def parse_html_list(self, task, config, url, params, headers): page = self.fetch_page(task, url, params, headers) soup = get_soup(page.text) try: item_text = soup.find('div', class_='lister-total-num-results').string.split() total_item_count = int(item_text[0].replace(',', '')) log.verbose('imdb list contains %d items', total_item_count) except AttributeError: total_item_count = 0 except (ValueError, TypeError) as e: # TODO Something is wrong if we get a ValueError, I think raise plugin.PluginError('Received invalid movie count: %s ; %s' % (soup.find('div', class_='lister-total-num-results').string, e)) if not total_item_count: log.verbose('No movies were found in imdb list: %s', config['list']) return entries = [] items_processed = 0 page_no = 1 while items_processed < total_item_count: # Fetch the next page unless we've just begun if items_processed: page_no += 1 params['page'] = page_no page = self.fetch_page(task, url, params, headers) soup = get_soup(page.text) items = soup.find_all('div', class_='lister-item') if not items: log.debug('no items found on page: %s, aborting.', url) break log.debug('%d items found on page %d', len(items), page_no) for item in items: items_processed += 1 a = item.find('h3', class_='lister-item-header').find('a') if not a: log.debug('no title link found for row, skipping') continue link = ('http://www.imdb.com' + a.get('href')).rstrip('/') entry = Entry() entry['title'] = a.text try: year = int(item.find('span', class_='lister-item-year').text) entry['title'] += ' (%s)' % year entry['imdb_year'] = year except (ValueError, TypeError): pass entry['url'] = link entry['imdb_id'] = extract_id(link) entry['imdb_name'] = entry['title'] entries.append(entry) return entries
def upgrade(ver, session): if ver is None: log.info('Converting seen imdb_url to imdb_id for seen movies.') field_table = table_schema('seen_field', session) for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url')): session.execute(update(field_table, field_table.c.id == row['id'], {'field': 'imdb_id', 'value': extract_id(row['value'])})) ver = 1 return ver
def seen_forget(manager, options): forget_name = options.forget_value if is_imdb_url(forget_name): imdb_id = extract_id(forget_name) if imdb_id: forget_name = imdb_id count, fcount = plugin_seen.forget(forget_name) console('Removed %s titles (%s fields)' % (count, fcount)) manager.config_changed()
def seen_forget(manager, options): forget_name = options.forget_value if is_imdb_url(forget_name): imdb_id = extract_id(forget_name) if imdb_id: forget_name = imdb_id count, fcount = forget(forget_name) console("Removed %s titles (%s fields)" % (count, fcount)) manager.config_changed()
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): console('IMDB url detected, try to parse ID') imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id else: console("Could not parse IMDB ID") plugin_seen.add(seen_name, 'cli_add', {'cli_add': seen_name}) console('Added %s as seen. This will affect all tasks.' % seen_name)
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): console('IMDB url detected, try to parse ID') imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id else: console("Could not parse IMDB ID") seen.add(seen_name, 'cli_add', {'cli_add': seen_name}) console('Added %s as seen. This will affect all tasks.' % seen_name)
class ImdbLookup(object): """ Retrieves imdb information for entries. Example: imdb_lookup: yes Also provides imdb lookup functionality to all other imdb related plugins. """ field_map = { 'imdb_url': 'url', 'imdb_id': lambda movie: extract_id(movie.url), 'imdb_name': 'title', 'imdb_photo': 'photo', 'imdb_plot_outline': 'plot_outline', 'imdb_score': 'score', 'imdb_votes': 'votes', 'imdb_year': 'year', 'imdb_genres': lambda movie: [genre.name for genre in movie.genres], 'imdb_languages': lambda movie: [lang.language.name for lang in movie.languages], 'imdb_actors': lambda movie: dict((actor.imdb_id, actor.name) for actor in movie.actors), 'imdb_directors': lambda movie: dict((director.imdb_id, director.name) for director in movie.directors), 'imdb_mpaa_rating': 'mpaa_rating', # Generic fields filled by all movie lookup plugins: 'movie_name': 'title', 'movie_year': 'year'} def validator(self): from flexget import validator return validator.factory('boolean') @priority(130) def on_task_metainfo(self, task, config): if not config: return for entry in task.entries: self.register_lazy_fields(entry) def register_lazy_fields(self, entry): entry.register_lazy_fields(self.field_map, self.lazy_loader) def lazy_loader(self, entry, field): """Does the lookup for this entry and populates the entry fields.""" try: self.lookup(entry) except PluginError, e: log_once(e.value.capitalize(), logger=log) # Set all of our fields to None if the lookup failed entry.unregister_lazy_fields(self.field_map, self.lazy_loader) return entry[field]
def lazy_loader(self, entry): """Does the lookup for this entry and populates the entry fields.""" imdb_id = (entry.get('imdb_id', eval_lazy=False) or imdb.extract_id(entry.get('imdb_url', eval_lazy=False))) try: with Session() as session: movie = lookup(smart_match=entry['title'], tmdb_id=entry.get('tmdb_id', eval_lazy=False), imdb_id=imdb_id, session=session) entry.update_using_map(self.field_map, movie) except LookupError: log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN)
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id with Session() as session: se = SeenEntry(seen_name, "cli_seen") sf = SeenField("cli_seen", seen_name) se.fields.append(sf) session.add(se) console("Added %s as seen. This will affect all tasks." % seen_name)
def lazy_loader(self, entry, field): """Does the lookup for this entry and populates the entry fields.""" imdb_id = entry.get('imdb_id', eval_lazy=False) or \ imdb.extract_id(entry.get('imdb_url', eval_lazy=False)) try: movie = lookup(smart_match=entry['title'], tmdb_id=entry.get('tmdb_id', eval_lazy=False), imdb_id=imdb_id) entry.update_using_map(self.field_map, movie) except LookupError, e: log.debug(u'Tmdb lookup for %s failed: %s' % (entry['title'], e.message)) # Set all of our fields to None if the lookup failed entry.unregister_lazy_fields(self.field_map, self.lazy_loader)
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id with Session() as session: se = SeenEntry(seen_name, 'cli_seen') sf = SeenField('cli_seen', seen_name) se.fields.append(sf) session.add(se) console('Added %s as seen. This will affect all tasks.' % seen_name)
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id with contextlib.closing(Session()) as session: se = SeenEntry(seen_name, 'cli_seen') sf = SeenField('cli_seen', seen_name) se.fields.append(sf) session.add(se) session.commit() console('Added %s as seen. This will affect all tasks.' % seen_name)
def upgrade(ver, session): if ver is None: log.info('Converting seen imdb_url to imdb_id for seen movies.') field_table = table_schema('seen_field', session) for row in session.execute(select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url')): new_values = {'field': 'imdb_id', 'value': extract_id(row['value'])} session.execute(update(field_table, field_table.c.id == row['id'], new_values)) ver = 1 if ver == 1: field_table = table_schema('seen_field', session) log.info('Adding index to seen_field table.') Index('ix_seen_field_seen_entry_id', field_table.c.seen_entry_id).create(bind=session.bind) ver = 2 return ver
def on_process_start(self, feed): if not feed.manager.options.forget: return feed.manager.disable_feeds() forget_name = unicode(feed.manager.options.forget) if is_imdb_url(forget_name): imdb_id = extract_id(forget_name) if imdb_id: forget_name = imdb_id count, fcount = forget(forget_name) log.info('Removed %s titles (%s fields)' % (count, fcount))
def lazy_loader(self, entry, field): """Does the lookup for this entry and populates the entry fields.""" imdb_id = (entry.get('imdb_id', eval_lazy=False) or imdb.extract_id(entry.get('imdb_url', eval_lazy=False))) try: movie = lookup(smart_match=entry['title'], tmdb_id=entry.get('tmdb_id', eval_lazy=False), imdb_id=imdb_id) entry.update_using_map(self.field_map, movie) except LookupError: log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN) # Set all of our fields to None if the lookup failed entry.unregister_lazy_fields(self.field_map, self.lazy_loader) return entry[field]
def lazy_loader(self, entry, field): """Does the lookup for this entry and populates the entry fields.""" imdb_id = entry.get('imdb_id', eval_lazy=False) or \ imdb.extract_id(entry.get('imdb_url', eval_lazy=False)) try: movie = lookup(smart_match=entry['title'], tmdb_id=entry.get('tmdb_id', eval_lazy=False), imdb_id=imdb_id) entry.update_using_map(self.field_map, movie) except LookupError: log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN) # Set all of our fields to None if the lookup failed entry.unregister_lazy_fields(self.field_map, self.lazy_loader) return entry[field]
def parse_what(what, lookup=True, session=None): """ Determines what information was provided by the search string `what`. If `lookup` is true, will fill in other information from tmdb. :param what: Can be one of: <Movie Title>: Search based on title imdb_id=<IMDB id>: search based on imdb id tmdb_id=<TMDB id>: search based on tmdb id :param bool lookup: Whether missing info should be filled in from tmdb. :param session: An existing session that will be used for lookups if provided. :rtype: dict :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys """ tmdb_lookup = plugin.get_plugin_by_name('api_tmdb').instance.lookup result = {'title': None, 'imdb_id': None, 'tmdb_id': None} result['imdb_id'] = extract_id(what) if not result['imdb_id']: if isinstance(what, int): result['tmdb_id'] = what elif what.startswith('tmdb_id='): result['tmdb_id'] = what[8:] else: result['title'] = what if not lookup: # If not doing an online lookup we can return here return result search_entry = Entry(title=result['title'] or '') for field in ['imdb_id', 'tmdb_id']: if result.get(field): search_entry[field] = result[field] # Put lazy lookup fields on the search entry plugin.get_plugin_by_name('imdb_lookup').instance.register_lazy_fields( search_entry) plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(search_entry) try: # Both ids are optional, but if movie_name was populated at least one of them will be there return { 'title': search_entry['movie_name'], 'imdb_id': search_entry.get('imdb_id'), 'tmdb_id': search_entry.get('tmdb_id') } except KeyError as e: raise QueueError(e.message)
def on_process_start(self, task): if not task.manager.options.forget: return task.manager.disable_tasks() forget_name = task.manager.options.forget if is_imdb_url(forget_name): imdb_id = extract_id(forget_name) if imdb_id: forget_name = imdb_id count, fcount = forget(forget_name) log.info('Removed %s titles (%s fields)' % (count, fcount)) task.manager.config_changed()
def parse_what(what, lookup=True, session=None): """ Determines what information was provided by the search string `what`. If `lookup` is true, will fill in other information from tmdb. :param what: Can be one of: <Movie Title>: Search based on title imdb_id=<IMDB id>: search based on imdb id tmdb_id=<TMDB id>: search based on tmdb id :param bool lookup: Whether missing info should be filled in from tmdb. :param session: An existing session that will be used for lookups if provided. :rtype: dict :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys """ tmdb_lookup = plugin.get_plugin_by_name("api_tmdb").instance.lookup result = {"title": None, "imdb_id": None, "tmdb_id": None} result["imdb_id"] = extract_id(what) if not result["imdb_id"]: if isinstance(what, int): result["tmdb_id"] = what elif what.startswith("tmdb_id="): result["tmdb_id"] = what[8:] else: result["title"] = what if not lookup: # If not doing an online lookup we can return here return result search_entry = Entry(title=result["title"] or "") for field in ["imdb_id", "tmdb_id"]: if result.get(field): search_entry[field] = result[field] # Put lazy lookup fields on the search entry plugin.get_plugin_by_name("imdb_lookup").instance.register_lazy_fields(search_entry) plugin.get_plugin_by_name("tmdb_lookup").instance.lookup(search_entry) try: # Both ids are optional, but if movie_name was populated at least one of them will be there return { "title": search_entry["movie_name"], "imdb_id": search_entry.get("imdb_id"), "tmdb_id": search_entry.get("tmdb_id"), } except KeyError as e: raise QueueError(e.message)
def search(self, task, entry, config): if not session.cookies or not session.passkey: try: login_params = {'username': config['username'], 'password': config['password'], 'loginkey': config['login_key']} r = session.post('https://piratethenet.org/takelogin.php', data=login_params, verify=False) except requests.RequestException as e: log.error('Error while logging in to PtN: %s', e) raise plugin.PluginError('Could not log in to PtN') # Sorty hacky, we'll just store the passkey on the session passkey = re.search('passkey=([\d\w]+)"', r.text) if passkey: session.passkey = passkey.group(1) else: log.error('PtN cookie info invalid') raise plugin.PluginError('PTN cookie info invalid') search_params = default_search_params.copy() if 'movie_name' in entry: if 'movie_year' in entry: search_params['advancedsearchparameters'] = '[year=%s]' % entry['movie_year'] searches = [entry['movie_name']] else: searches = entry.get('search_strings', [entry['title']]) results = set() for search in searches: search_params['searchstring'] = search try: r = session.get('http://piratethenet.org/torrentsutils.php', params=search_params) except requests.RequestException as e: log.error('Error searching ptn: %s' % e) continue # html5parser doesn't work properly for some reason soup = get_soup(r.text, parser='html.parser') for movie in soup.select('.torrentstd'): imdb_id = movie.find('a', href=re.compile('.*imdb\.com/title/tt')) if imdb_id: imdb_id = extract_id(imdb_id['href']) if imdb_id and 'imdb_id' in entry and imdb_id != entry['imdb_id']: continue results.update(self.create_entries(movie, passkey=session.passkey, imdb_id=imdb_id)) return results
def parse_rlslog(self, rlslog_url, task): """ :param rlslog_url: Url to parse from :param task: Task instance :return: List of release dictionaries """ # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/ soup = get_soup(task.requests.get(rlslog_url, timeout=25).content) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} h3 = entry.find('h3', attrs={'class': 'entrytitle'}) if not h3: log.debug('FAIL: No h3 entrytitle') continue release['title'] = h3.a.contents[0].strip() entrybody = entry.find('div', attrs={'class': 'entrybody'}) if not entrybody: log.debug('FAIL: No entrybody') continue log.trace('Processing title %s' % (release['title'])) # find imdb url link_imdb = entrybody.find('a', text=re.compile(r'imdb', re.IGNORECASE)) if link_imdb: release['imdb_id'] = extract_id(link_imdb['href']) release['imdb_url'] = link_imdb['href'] # find google search url google = entrybody.find('a', href=re.compile(r'google', re.IGNORECASE)) if google: release['url'] = google['href'] releases.append(release) else: log_once( '%s skipped due to missing or unsupported download link' % (release['title']), log) return releases
def parse_what(what, session=None): """Parses needed movie information for a given search string. Search string can be one of: <Movie Title>: Search based on title imdb_id=<IMDB id>: search based on imdb id tmdb_id=<TMDB id>: search based on tmdb id""" tmdb_lookup = get_plugin_by_name('api_tmdb').instance.lookup imdb_id = extract_id(what) try: if imdb_id: movie = tmdb_lookup(imdb_id=imdb_id, session=session) elif what.startswith('tmdb_id='): movie = tmdb_lookup(tmdb_id=what[8:], session=session) else: movie = tmdb_lookup(title=what, session=session) except LookupError, e: raise QueueError(e.message)
def on_process_start(self, task): if not task.manager.options.seen: return task.manager.disable_tasks() seen_name = task.manager.options.seen if is_imdb_url(seen_name): imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id session = Session() se = SeenEntry(u'--seen', unicode(task.name)) sf = SeenField(u'--seen', seen_name) se.fields.append(sf) session.add(se) session.commit() log.info('Added %s as seen. This will affect all tasks.' % seen_name)
def on_task_input(self, task, config): log.verbose('Retrieving list %s ...' % config['list']) # Get the imdb list in RSS format if config['list'] in ['watchlist', 'ratings', 'checkins']: url = 'http://rss.imdb.com/user/%s/%s' % (config['user_id'], config['list']) else: url = 'http://rss.imdb.com/list/%s' % config['list'] log.debug('Requesting %s' % url) try: rss = feedparser.parse(url) except LookupError as e: raise plugin.PluginError( 'Failed to parse RSS feed for list `%s` correctly: %s' % (config['list'], e)) if rss.status == 404: raise plugin.PluginError( 'Unable to get imdb list. Either list is private or does not exist.' ) # Create an Entry for each movie in the list entries = [] title_re = re.compile(r'(.*) \((\d{4})?.*?\)$') for entry in rss.entries: try: # IMDb puts some extra stuff in the titles, e.g. "Battlestar Galactica (2004 TV Series)" # Strip out everything but the date match = title_re.match(entry.title) title = match.group(1) if match.group(2): title += ' (%s)' % match.group(2) entries.append( Entry(title=title, url=entry.link, imdb_id=extract_id(entry.link), imdb_name=match.group(1))) except IndexError: log.critical('IndexError! Unable to handle RSS entry: %s' % entry) return entries
def matches(self, feed, config, entry): # make sure the entry has IMDB fields filled try: get_plugin_by_name('imdb_lookup').instance.lookup(feed, entry) except PluginError: # no IMDB data, can't do anything return imdb_id = None if entry.get('imdb_id'): imdb_id = entry['imdb_id'] elif entry.get('imdb_url'): imdb_id = extract_id(entry['imdb_url']) if not imdb_id: log.warning("No imdb id could be determined for %s" % entry['title']) return return feed.session.query(QueuedMovie).filter(QueuedMovie.imdb_id == imdb_id).\ filter(QueuedMovie.downloaded == None).first()
def parse_rlslog(self, rlslog_url, task): """ :param rlslog_url: Url to parse from :param task: Task instance :return: List of release dictionaries """ # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/ soup = get_soup(task.requests.get(rlslog_url, timeout=25).content) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} h3 = entry.find('h3', attrs={'class': 'entrytitle'}) if not h3: log.debug('FAIL: No h3 entrytitle') continue release['title'] = h3.a.contents[0].strip() entrybody = entry.find('div', attrs={'class': 'entrybody'}) if not entrybody: log.debug('FAIL: No entrybody') continue log.trace('Processing title %s' % (release['title'])) # find imdb url link_imdb = entrybody.find('a', text=re.compile(r'imdb', re.IGNORECASE)) if link_imdb: release['imdb_id'] = extract_id(link_imdb['href']) release['imdb_url'] = link_imdb['href'] # find google search url google = entrybody.find('a', href=re.compile(r'google', re.IGNORECASE)) if google: release['url'] = google['href'] releases.append(release) else: log_once('%s skipped due to missing or unsupported download link' % (release['title']), log) return releases
def lookup(self, entry, search_allowed=True): """ Perform Rotten Tomatoes lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ imdb_id = entry.get('imdb_id', eval_lazy=False) or \ imdb.extract_id(entry.get('imdb_url', eval_lazy=False)) if imdb_id: movie = lookup_movie(title=entry.get('imdb_name'), year=entry.get('imdb_year'), rottentomatoes_id=entry.get('rt_id', eval_lazy=False), imdb_id=imdb_id, only_cached=(not search_allowed)) else: movie = lookup_movie(smart_match=entry['title'], rottentomatoes_id=entry.get('rt_id', eval_lazy=False), only_cached=(not search_allowed)) log.debug(u'Got movie: %s' % movie) entry.update_using_map(self.field_map, movie)
def on_task_input(self, task, config): log.verbose('Retrieving list %s ...' % config['list']) # Get the imdb list in RSS format if config['list'] in ['watchlist', 'ratings', 'checkins']: url = 'http://rss.imdb.com/user/%s/%s' % (config['user_id'], config['list']) else: url = 'http://rss.imdb.com/list/%s' % config['list'] log.debug('Requesting %s' % url) try: rss = feedparser.parse(url) except LookupError as e: raise plugin.PluginError('Failed to parse RSS feed for list `%s` correctly: %s' % (config['list'], e)) if rss.status == 404: raise plugin.PluginError('Unable to get imdb list. Either list is private or does not exist.') # Create an Entry for each movie in the list entries = [] for entry in rss.entries: try: entries.append(Entry(title=entry.title, url=entry.link, imdb_id=extract_id(entry.link), imdb_name=entry.title)) except IndexError: log.critical('IndexError! Unable to handle RSS entry: %s' % entry) return entries
def parse_html_list(self, task, config, url, params, headers): page = self.fetch_page(task, url, params, headers) soup = get_soup(page.text) try: item_text = soup.find( 'div', class_='lister-total-num-results').string.split() total_item_count = int(item_text[0]) log.verbose('imdb list contains %d items', total_item_count) except AttributeError: total_item_count = 0 except (ValueError, TypeError) as e: # TODO Something is wrong if we get a ValueError, I think raise plugin.PluginError( 'Received invalid movie count: %s ; %s' % (soup.find( 'div', class_='lister-total-num-results').string, e)) if not total_item_count: log.verbose('No movies were found in imdb list: %s', config['list']) return entries = [] items_processed = 0 page_no = 1 while items_processed < total_item_count: # Fetch the next page unless we've just begun if items_processed: page_no += 1 params['page'] = page_no page = self.fetch_page(task, url, params, headers) soup = get_soup(page.text) items = soup.find_all('div', class_='lister-item') if not items: log.debug('no items found on page: %s, aborting.', url) break log.debug('%d items found on page %d', len(items), page_no) for item in items: items_processed += 1 a = item.find('h3', class_='lister-item-header').find('a') if not a: log.debug('no title link found for row, skipping') continue link = ('http://www.imdb.com' + a.get('href')).rstrip('/') entry = Entry() entry['title'] = a.text try: year = int( item.find('span', class_='lister-item-year').text) entry['title'] += ' (%s)' % year entry['imdb_year'] = year except (ValueError, TypeError): pass entry['url'] = link entry['imdb_id'] = extract_id(link) entry['imdb_name'] = entry['title'] entries.append(entry) return entries
def imdb_id(self): return extract_id(self.url)
def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise plugin.PluginError( 'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.' ) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get( 'imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del (entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = session.query(SearchResult).filter( SearchResult.title == entry['title']).first() if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise plugin.PluginError('IMDB lookup failed for %s' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_id'] = result.imdb_id entry['imdb_url'] = result.url movie = None # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) session.commit() log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('IMDB lookup failed for %s' % entry['title'], log, logging.WARN, session=session) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) session.commit() raise plugin.PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).filter( Movie.url == entry['imdb_url']).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter( MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter( Movie.url == entry['imdb_url']).delete() session.commit() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error( 'Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) session.commit() raise plugin.PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError( 'Invalid parameter: %s' % entry['imdb_url'], log) for att in [ 'title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating' ]: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie)
class ImdbLookup(object): """ Retrieves imdb information for entries. Example: imdb_lookup: yes Also provides imdb lookup functionality to all other imdb related plugins. """ field_map = { 'imdb_url': 'url', 'imdb_id': lambda movie: extract_id(movie.url), 'imdb_name': 'title', 'imdb_original_name': 'original_title', 'imdb_photo': 'photo', 'imdb_plot_outline': 'plot_outline', 'imdb_score': 'score', 'imdb_votes': 'votes', 'imdb_year': 'year', 'imdb_genres': lambda movie: [genre.name for genre in movie.genres], 'imdb_languages': lambda movie: [lang.language.name for lang in movie.languages], 'imdb_actors': lambda movie: dict( (actor.imdb_id, actor.name) for actor in movie.actors), 'imdb_directors': lambda movie: dict( (director.imdb_id, director.name) for director in movie.directors), 'imdb_mpaa_rating': 'mpaa_rating', # Generic fields filled by all movie lookup plugins: 'movie_name': 'title', 'movie_year': 'year' } schema = {'type': 'boolean'} @plugin.priority(130) def on_task_metainfo(self, task, config): if not config: return for entry in task.entries: self.register_lazy_fields(entry) def register_lazy_fields(self, entry): entry.register_lazy_func(self.lazy_loader, self.field_map) def lazy_loader(self, entry): """Does the lookup for this entry and populates the entry fields.""" try: self.lookup(entry) except plugin.PluginError as e: log_once(str(e.value).capitalize(), logger=log) @with_session def imdb_id_lookup(self, movie_title=None, raw_title=None, session=None): """ Perform faster lookup providing just imdb_id. Falls back to using basic lookup if data cannot be found from cache. .. note:: API will be changed, it's dumb to return None on errors AND raise PluginError on some else :param movie_title: Name of the movie :param raw_title: Raw entry title :return: imdb id or None :raises PluginError: Failure reason """ if movie_title: log.debug('imdb_id_lookup: trying with title: %s' % movie_title) movie = session.query(Movie).filter( Movie.title == movie_title).first() if movie: log.debug('--> success! got %s returning %s' % (movie, movie.imdb_id)) return movie.imdb_id if raw_title: log.debug('imdb_id_lookup: trying cache with: %s' % raw_title) result = session.query(SearchResult).filter( SearchResult.title == raw_title).first() if result: # this title is hopeless, give up .. if result.fails: return None log.debug('--> success! got %s returning %s' % (result, result.imdb_id)) return result.imdb_id if raw_title: # last hope with hacky lookup fake_entry = Entry(raw_title, '') self.lookup(fake_entry) return fake_entry['imdb_id'] @plugin.internet(log) @with_session def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise plugin.PluginError( 'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.' ) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get( 'imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del (entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = session.query(SearchResult).filter( SearchResult.title == entry['title']).first() if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise plugin.PluginError('IMDB lookup failed for %s' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_id'] = result.imdb_id entry['imdb_url'] = result.url movie = None # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) session.commit() log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('IMDB lookup failed for %s' % entry['title'], log, logging.WARN, session=session) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) session.commit() raise plugin.PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).filter( Movie.url == entry['imdb_url']).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter( MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter( Movie.url == entry['imdb_url']).delete() session.commit() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error( 'Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) session.commit() raise plugin.PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError( 'Invalid parameter: %s' % entry['imdb_url'], log) for att in [ 'title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating' ]: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie) def _parse_new_movie(self, imdb_url, session): """ Get Movie object by parsing imdb page and save movie into the database. :param imdb_url: IMDB url :param session: Session to be used :return: Newly added Movie """ parser = ImdbParser() parser.parse(imdb_url) # store to database movie = Movie() movie.photo = parser.photo movie.title = parser.name movie.original_title = parser.original_name movie.score = parser.score movie.votes = parser.votes movie.year = parser.year movie.mpaa_rating = parser.mpaa_rating movie.plot_outline = parser.plot_outline movie.url = imdb_url for name in parser.genres: genre = session.query(Genre).filter(Genre.name == name).first() if not genre: genre = Genre(name) movie.genres.append(genre) # pylint:disable=E1101 for index, name in enumerate(parser.languages): language = session.query(Language).filter( Language.name == name).first() if not language: language = Language(name) movie.languages.append(MovieLanguage(language, prominence=index)) for imdb_id, name in parser.actors.items(): actor = session.query(Actor).filter( Actor.imdb_id == imdb_id).first() if not actor: actor = Actor(imdb_id, name) movie.actors.append(actor) # pylint:disable=E1101 for imdb_id, name in parser.directors.items(): director = session.query(Director).filter( Director.imdb_id == imdb_id).first() if not director: director = Director(imdb_id, name) movie.directors.append(director) # pylint:disable=E1101 # so that we can track how long since we've updated the info later movie.updated = datetime.now() session.add(movie) return movie
def on_task_input(self, task, config): # Create movie entries by parsing imdb list page(s) html using beautifulsoup log.verbose('Retrieving imdb list: %s' % config['list']) params = {'view': 'compact'} if config['list'] in ['watchlist', 'ratings', 'checkins']: url = 'http://www.imdb.com/user/%s/%s' % (config['user_id'], config['list']) else: url = 'http://www.imdb.com/list/%s' % config['list'] log.debug('Requesting: %s' % url) page = task.requests.get(url, params=params) if page.status_code != 200: raise plugin.PluginError('Unable to get imdb list. Either list is private or does not exist.') soup = get_soup(page.text) # TODO: Something is messed up with the html5lib parser and imdb, have to get to our subsection without # recursion before doing a regular find. Repeated in the loop below as well. soup = soup.find('div', id='root').find('div', id='pagecontent', recursive=False) div = soup.find('div', class_='desc') if div: total_movie_count = int(div.get('data-size')) else: total_movie_count = 0 if total_movie_count == 0: log.verbose('No movies were found in imdb list: %s' % config['list']) return entries = [] start = 1 while start < total_movie_count: if start == 1: trs = soup.find_all(attrs={'data-item-id': True}) else: params['start'] = start page = task.requests.get(url, params=params) if page.status_code != 200: raise plugin.PluginError('Unable to get imdb list.') soup = get_soup(page.text) # TODO: This is a hack, see above soup = soup.find('div', id='root').find('div', id='pagecontent', recursive=False) trs = soup.find_all(attrs={'data-item-id': True}) for tr in trs: a = tr.find('td', class_='title').find('a') if not a: log.debug('no title link found for row, skipping') continue link = ('http://www.imdb.com' + a.get('href')).rstrip('/') entry = Entry() entry['title'] = a.string try: year = int(tr.find('td', class_='year').string) entry['title'] += ' (%s)' % year entry['imdb_year'] = year except ValueError: pass entry['url'] = link entry['imdb_id'] = extract_id(link) entry['imdb_name'] = entry['title'] entries.append(entry) start = len(entries) + 1 return entries
def search(self, entry, config): login_sess = requests.Session() login_params = { 'username': config['username'], 'password': config['password'], 'loginkey': config['login_key'] } try: login_sess.post('https://piratethenet.org/takelogin.php', data=login_params, verify=False) except requests.RequestException as e: log.error('Error while logging in to PtN: %s', e) download_auth = CookieAuth(login_sess.cookies) # Default to searching by title (0=title 3=imdb_id) search_by = 0 if 'imdb_id' in entry: searches = [entry['imdb_id']] search_by = 3 elif 'movie_name' in entry: search = entry['movie_name'] if 'movie_year' in entry: search += ' %s' % entry['movie_year'] searches = [search] else: searches = entry.get('search_strings', [entry['title']]) params = {'_by': search_by} if config.get('categories'): for cat in config['categories']: params[categories[cat]] = 1 results = set() for search in searches: params['search'] = search try: r = login_sess.get('http://piratethenet.org/browse.php', params=params) except requests.RequestException as e: log.error('Error searching ptn: %s' % e) continue soup = get_soup(r.text) if 'login' in soup.head.title.text.lower(): log.error('PtN cookie info invalid') raise plugin.PluginError('PTN cookie info invalid') try: results_table = soup.find_all('table', attrs={'class': 'main'}, limit=2)[1] except IndexError: log.debug('no results found for `%s`' % search) continue for row in results_table.find_all('tr')[1:]: columns = row.find_all('td') entry = Entry() links = columns[1].find_all('a', recursive=False, limit=2) entry['title'] = links[0].text if len(links) > 1: entry['imdb_id'] = extract_id(links[1].get('href')) entry['url'] = 'http://piratethenet.org/' + columns[2].a.get( 'href') entry['download_auth'] = download_auth entry['torrent_seeds'] = int(columns[8].text) entry['torrent_leeches'] = int(columns[9].text) entry['search_sort'] = torrent_availability( entry['torrent_seeds'], entry['torrent_leeches']) size = columns[6].find('br').previous_sibling unit = columns[6].find('br').next_sibling if unit == 'GB': entry['content_size'] = int(float(size) * 1024) elif unit == 'MB': entry['content_size'] = int(float(size)) elif unit == 'KB': entry['content_size'] = int(float(size) / 1024) results.add(entry) return results