def _getSeries(self, series): """This searches TheTVDB.com for the series name, If a custom_ui UI is configured, it uses this to select the correct series. If not, and interactive == True, ConsoleUI is used, if not BaseUI is used to select the first result. """ allSeries = [] try: allSeries += self.search(series) if not allSeries: raise tvdb_shownotfound except tvdb_shownotfound: # search via imdbId for x in imdbpie.Imdb().search_for_title(series): if x['title'].lower() == series.lower(): allSeries += self.search(imdbid=x['imdb_id']) if not allSeries: sickrage.srCore.srLogger.debug('Series result returned zero') raise tvdb_shownotfound( "Show search returned zero results (cannot find show on theTVDB)" ) ui = BaseUI(config=self.config) if self.config['custom_ui'] is not None: CustomUI = self.config['custom_ui'] ui = CustomUI(config=self.config) return ui.selectSeries(allSeries, series)
def mt_movie(movie): imdb = imdbpie.Imdb() try: search = imdb.search_for_title(movie)[0] except IndexError: return 'Movie not found.' movie = imdb.get_title_by_id(search['imdb_id']) return '{} ({})\n{}\nIMDB: {}'.format(movie.title, movie.year, movie.plot_outline, movie.rating)
import imdb import imdbpie import sys import woodsy ia_pie = imdbpie.Imdb() actor_dict = {} actor_list_one = [] pie_id_one = woodsy.retrive_movie_pie(ia_pie) media_obj_one = ia_pie.get_title_by_id(pie_id_one) actors_one_raw = woodsy.get_full_cast(ia_pie, pie_id_one) actor_list_two = [] pie_id_two = woodsy.retrive_movie_pie(ia_pie) media_obj_two = ia_pie.get_title_by_id(pie_id_two) actors_two_raw = woodsy.get_full_cast(ia_pie, pie_id_two) print set(actors_one_raw).intersection(actors_two_raw) # #woodsy.get_right_actors_in_right_api(ia,actors_one_raw,actor_list_one,media_obj_one) # media_obj_two = woodsy.retrive_movie(ia) # print media_obj_two # pie_id_two = woodsy.find_pie_match(ia_pie, media_obj_two['title'], int(media_obj_two['year'])) # pie_obj_two = ia_pie.get_title_by_id(pie_id_two) # # actors_two_raw = woodsy.get_full_cast(ia_pie, pie_id_two) #
def __init__(self): self.imdb = imdbpie.Imdb() self.movie = None
def search_text_episode(): LOGGER.info('Searching media using text research on episode') ep = parsed['episode'] # To allow to search on the tvdb tvdb = tvdb_api.Tvdb( cache=False, language='en', ) # Perform the search, if nothing is found, there's a problem... try: series = tvdb.search(ep['show']) except Exception as e: raise ResolveException(e) if not series: return series = tvdb[series[0]['seriesName']] episode = series[ep['season']][ep['episodes'][0]] # Initialize the imdb object to perform the research imdb = imdbpie.Imdb(exclude_episodes=False, ) # Use imdb to search for the series using its name or aliases search = None for seriesName in [ series['seriesName'], ] + series['aliases']: try: search = imdb.search_for_title(seriesName) except Exception as e: raise ResolveException(e) # Filter the results by name and type search = [ s for s in search if s['type'] == 'TV series' and ( s['title'] == seriesName or '{title} ({year})'.format( **s) == seriesName) ] # If there is still more than one, filter by year if len(search) > 1: search = [ s for s in search if s['year'] == series['firstAired'][:4] ] # If we have a series, we can stop there! if search: break # If we did not find anything that matches if not search: return # Get the series' seasons and episodes series = imdb.get_title_episodes(search[0]['imdb_id']) for season in series['seasons']: if season['season'] != ep['season']: continue for episode in season['episodes']: if episode['episode'] != ep['episodes'][0]: continue # id is using format /title/ttXXXXXX/ return imdb, imdb.get_title(episode['id'][7:-1]) # Not found return
def search_hash(): if not oshash: return LOGGER.info('Searching media using hash research') # Search for the files corresponding to this hash try: medias = OpenSubtitlesAPI.check_hash([ oshash, ]) except Exception as e: raise ResolveException(e) # If the hash is not in the results medias = medias['data'] if 'data' in medias else [] if oshash not in medias: return # We're only interested in that hash medias = medias[oshash] if len(medias) == 1: # There is only one, so might as well be that one! media = medias[0] # Unless it's not the same type... if media['MovieKind'] not in parsed: return else: # Initialize media to None in case we don't find anything media = None # Search differently if it's an episode or a movie if 'episode' in parsed: episode = parsed['episode'] season = episode['season'] fepisode = episode['episodes'][0] # Define the prefix that characterize the show show_prefix = '"{}"'.format(episode['show'].lower()) # And search if we find the first episode for m in medias: if m['MovieKind'] != 'episode' or \ int(m['SeriesSeason']) != season or \ int(m['SeriesEpisode']) != fepisode or \ not m['MovieName'].lower().startswith( show_prefix): continue media = m break # If we reach here and still haven't got the episode, try # to see if we had maybe a typo in the name if not media: def weight_episode(x): return fuzzywuzzy.fuzz.ratio( parsed['episode']['show'], re.sub('^\"([^"]*)\" .*$', '\\1', x['MovieName'])) # Filter only the episodes that can match with the # information we got episodes = [ m for m in medias if m['MovieKind'] == 'episode' and int(m['SeriesSeason']) == season and int(m['SeriesEpisode']) == fepisode ] if episodes: # Use fuzzywuzzy to get the closest show name closest = max( episodes, key=weight_episode, ) if weight_episode(closest) >= .8: media = closest if not media and 'movie' in parsed: movie = parsed['movie'] media_name = movie.get('title') # Filter only the movies movies = [m for m in medias if m['MovieKind'] == 'movie'] if movies: # Use fuzzywuzzy to get the closest movie name media = max(movies, key=lambda x: fuzzywuzzy.fuzz.ratio( media_name, x['MovieName'])) # If when reaching here we don't have the media, return None if not media: return # Else, we will need imdb for getting more detailed information on # the media; we'll exclude episodes if we know the media is a movie imdb = imdbpie.Imdb( exclude_episodes=(media['MovieKind'] == 'movie'), ) try: result = imdb.get_title('tt{}'.format(media['MovieImdbID'])) except Exception as e: raise ResolveException(e) # Find the media return imdb, result
def search_text_movie(): LOGGER.info('Searching media using text research on movie') movie = parsed['movie'] # Initialize the imdb object to perform the research imdb = imdbpie.Imdb(exclude_episodes=True, ) # Use imdb to search for the movie try: search = imdb.search_for_title(movie['title']) except Exception as e: raise ResolveException(e) # Filter out everything that is not starting with 'tt', as only # IMDB IDs starting with 'tt' represent movies/episodes, and # filter out everything considered as a TV series search = [ s for s in search if s['imdb_id'].startswith('tt') and s['type'] != 'TV series' ] if not search: return year_found = False for r in search: r['fuzz_ratio'] = fuzzywuzzy.fuzz.ratio( movie['title'], r['title']) if movie['year'] and \ not year_found and \ r['year'] == movie['year']: year_found = True if year_found: search = [r for r in search if r['year'] == movie['year']] if not duration: # If we don't have the movie duration, we won't be able to use # it to discriminate the movies, so just use the highest ratio max_ratio = max(r['fuzz_ratio'] for r in search) search = [r for r in search if r['fuzz_ratio'] == max_ratio] # Even if there is multiple with the highest ratio, only # return one return imdb, imdb.get_title(search[0]['imdb_id']) # If we have the movie duration, we can use it to make the # research more precise, so we can be more gentle on the ratio sum_ratio = sum(r['fuzz_ratio'] for r in search) mean_ratio = sum_ratio / float(len(search)) std_dev_ratio = math.sqrt( sum([ math.pow(r['fuzz_ratio'] - mean_ratio, 2) for r in search ]) / float(len(search))) # Select only the titles over a given threshold threshold = min(mean_ratio + std_dev_ratio, max(r['fuzz_ratio'] for r in search)) search = [r for r in search if r['fuzz_ratio'] >= threshold] # Now we need to get more information to identify precisely # the movie for r in search: r['details'] = imdb.get_title(r['imdb_id']) # Try to get the closest movie using the movie duration # if available def weight_movie_by_duration(movie): if not duration: return sys.maxint rt = movie['details']['base'].get('runningTimeInMinutes') if rt is None: return sys.maxint movie['duration_closeness'] = abs(rt * 60. - duration) return movie['duration_closeness'] closest = min( search, key=weight_movie_by_duration, ) # If the closest still has a duration difference with the expected # one that is more than half of the expected duration, it is # probably not the right one! if duration and closest['duration_closeness'] > (duration / 2.): return # Return the imdb information of the closest movie found return imdb, closest['details'], closest['fuzz_ratio']
def get_movie_info(movie_fname, movie_name, movie_year='', movie_duration=None): global useragent, proxy, login # Load logger LOG = logging.getLogger(__name__) try: movie_name = remove_accents(movie_name) except TypeError: pass LOG.debug(('received parameters: {{movie_fname => {0}, movie_name => {1}, ' 'movie_year => {2}, movie_duration => {3}}}').format( movie_fname, movie_name, movie_year, movie_duration, )) # Initialize the imdbpie.Imdb object to get more information about movies imdb = imdbpie.Imdb( # For this version of TraktForVLC, we only want to return movies, # not episodes exclude_episodes=True, ) movie_info = None movie_hash = None movie_found_by_hash = False if movie_fname and os.path.isfile(movie_fname): # Initialize the connection to opensubtitles if proxy is None: proxy = xmlrpc.ServerProxy("http://api.opensubtitles.org/xml-rpc") if login is None: login = proxy.LogIn('', '', 'en', useragent) LOG.debug('OpenSubtitles UserAgent: {0}'.format(useragent)) # Compute the hash for the file movie_hash = hashFile(movie_fname) LOG.debug('Computed movie hash: {0}'.format(movie_hash)) # Search for the files corresponding to this hash movies = proxy.CheckMovieHash2(login['token'], [ movie_hash, ]) movies = movies['data'] if 'data' in movies else [] if movie_hash in movies: LOG.debug('We found movies using the hash') # Use fuzzywuzzy to get the closest file name movie_info = ( max(movies[movie_hash], key=lambda x: fuzzywuzzy.fuzz.ratio(movie_name, x)) if len(movies[movie_hash]) > 1 else movies[movie_hash][0]) movie_info['details'] = imdb.get_title_by_id('tt{0}'.format( movie_info['MovieImdbID'])) movie_found_by_hash = True if movie_info is None: # Use imdb to search for the movie search = imdb.search_for_title(movie_name) if not search: raise RuntimeError('Movie not found! 1') LOG.debug('Found {0} results using IMDB'.format(len(search))) LOG.debug(search) # Compute the proximity ratio of the title and search if the actual # year exists if it was provided year_found = False for r in search: r['fuzz_ratio'] = fuzzywuzzy.fuzz.ratio(movie_name, r['title']) if movie_year and \ not year_found and \ r['year'] == movie_year and \ r['fuzz_ratio'] >= 50.: year_found = True # If the actual year exists, clean it if year_found: search = [r for r in search if r['year'] == movie_year] LOG.debug('{0} results left after first filters'.format(len(search))) LOG.debug(search) if movie_duration: # If we have the movie duration, we can use it to make the # research more precise sum_ratio = sum(r['fuzz_ratio'] for r in search) mean_ratio = sum_ratio / float(len(search)) std_dev_ratio = math.sqrt( sum([ math.pow(r['fuzz_ratio'] - mean_ratio, 2) for r in search ]) / float(len(search))) # Select only the titles over a given threshold threshold = max(50., mean_ratio + (std_dev_ratio / 2.)) LOG.debug(('Computed ratio: {{mean => {0}, stdev => {1}, ' 'threshold => {2}}}').format( mean_ratio, std_dev_ratio, threshold, )) search = [r for r in search if r['fuzz_ratio'] >= threshold] else: # If we don't have the movie duration, just use # the highest ratio max_ratio = max(50., max(r['fuzz_ratio'] for r in search)) search = [r for r in search if r['fuzz_ratio'] == max_ratio] if len(search) > 1: search = [ search[0], ] LOG.debug('{0} results left after second filters'.format(len(search))) LOG.debug(search) if search: # Now we need to get more information to identify precisely # the movie for r in search: num_try = 0 while 'details' not in r: try: r['details'] = imdb.get_title_by_id(r['imdb_id']) except requests.exceptions.HTTPError as e: if e.response.status_code != 503: raise num_try += 1 if num_try < 3: LOG.info(( 'Received HTTP 503 error, waiting {0} seconds ' 'before retrying for movie {1}').format( 2**num_try, r)) time.sleep(2**num_try) else: LOG.info('Received HTTP 503 error, giving up') break # Try to get the closest movie using the movie duration # if available movie_info = min( search, key=lambda x: abs(x['details'].runtime - movie_duration) if movie_duration and 'details' in x and x[ 'details'].runtime is not None else float('inf')) else: movie_info = {} # We want to use only the details from now on details = movie_info.get('details') if details is None: raise LookupError("unable to find the movie '{0}'".format(movie_name)) if movie_hash and \ not movie_found_by_hash and \ movie_info.get('fuzz_ratio', 0.) > 60.: LOG.debug('Sending movie hash information to opensubtitles') # Insert the movie hash if possible! res = proxy.InsertMovieHash(login['token'], [ { 'moviehash': movie_hash, 'moviebytesize': os.path.getsize(movie_fname), 'imdbid': details.imdb_id[2:], 'movietimems': (movie_duration if movie_duration else details.runtime), 'moviefilename': os.path.basename(movie_fname), }, ]) if res['status'] != '200 OK': logging.warn('Unable to submit hash for movie \'{0}\': {1}'.format( details.title, res['status'])) dict_info = { 'Director': details.directors_summary[0].name, 'Plot': details.plot_outline, 'Runtime': details.runtime, 'Title': details.title, 'Year': details.year, 'imdbID': details.imdb_id, 'imdbRating': details.rating, } return dict_info
def getIMDbInfoFromImdbpie(imdbInfo): def getIMDbDistinction(ia, mName): print print print mName s_result = ia.search_for_title(mName) html = "" i = 0 amountPerfectlyMatchingCandidates = 0 lastPerfectlyMatchingCandidate = {} while (i < min(len(s_result), 10)): item = s_result[i] out = " [" + str(i) + "] " + (item["title"]) + " (" + str( item["year"]) + ")" # movie is completely like the searched one if (len(item["title"].strip()) == len(mName.strip()) and (item["title"].lower().strip() in mName.lower().strip())): amountPerfectlyMatchingCandidates += 1 lastPerfectlyMatchingCandidate = item s_result[i]["link"] = link = "http://www.imdb.com/title/" + str( item["imdb_id"]) html += "<br /><a href='" + link + "' target='_blank'>IMDb</a> " + out i += 1 if (i < 3): print out + "\t", if (amountPerfectlyMatchingCandidates == 1 or len(s_result) == 1): return lastPerfectlyMatchingCandidate print oldFile = read_file(fileToDistinguishSimilarMovienames) write_file( fileToDistinguishSimilarMovienames, oldFile.split("<center>")[0] + "<center>" + html + "</center>" + oldFile.split("</center>")[1]) ch = raw_input( " choose your movie (moviesToDistinguish.html or above) number ('n' for new search string): " ) try: if (str(int(ch)) in str(ch) and len(str(int(ch))) == len(ch)): return s_result[int(ch)] except: if (len(ch) > 1): return "n" else: return ch search = imdbpie.Imdb() movieDictShort = -1 while (("n" in str(movieDictShort) and len(movieDictShort) == 1) or movieDictShort == -1): if ("n" in str(movieDictShort) and len(movieDictShort) == 1): bla = raw_input(" name movie string to search for: ") movieDictShort = getIMDbDistinction(search, bla) else: movieDictShort = getIMDbDistinction(search, imdbInfo["moviename"]) if (not movieDictShort or len(movieDictShort) < 3): return imdbInfo imdbInfo["name"] = movieDictShort["title"] imdbInfo["year"] = movieDictShort["year"] imdbInfo["link"] = movieDictShort["link"] imdbInfo["rating"] = "" #images = search.get_title_images( movieDictShort["imdb_id"] ) #for im in search._get_images( movieDictShort["imdb_id"] ): # print "Image: ", im #if len(images): # imdbInfo["coverLink"] = images[0] return imdbInfo
import re import urllib.parse import urllib.request import imdbpie import fresh_tomatoes import media movies = [] imdb = imdbpie.Imdb() """Get movie info and build the movie list search youtube for the movie trailer and get it's URL' create instance of Movie class Populate movies list Create HTML page with the movies """ for i in range(0, 6): title = imdb.get_title_by_id(imdb.top_250()[i]['tconst']) name = title.title storyline = title.plot_outline poster_image_url = title.poster_url # search youtube for the movie trailer and get it's URL' query_string = urllib.parse.urlencode({"search_query": name}) html_content = urllib.request.urlopen("http://www.youtube.com/results?" + query_string) search_results = re.findall(r'href=\\', html_content.read().decode()) trailer_youtube_url = "http://www.youtube.com/watch?v=" + \ search_results[0] # create instance of Movie class movie = media.Movie(name, storyline, poster_image_url, trailer_youtube_url) # Populate movies list
import bs4 import urllib3 import imdbpie imdb = imdbpie.Imdb(anonymize=True) def getMovieInfo(movie_title): movies = imdb.search_for_title(movie_title) if (len(movies) < 1): return ('Lo siento, no he encontrado esa película...') else: movie = imdb.get_title_by_id(movies[0]['imdb_id']) movie_info = "*" + movie.title + " - imdb rating: " + str( movie.rating) + "\nReparto:* " + str([ x.name for x in movie.cast_summary ]) + "\n*Sinopsis:* " + movie.plot_outline return (movie_info) def fromDictToString(cine, dict): info_str = cine + '\n _______________ \n\n' for key, value in dict.items(): info_str = info_str + key + '\n' + str(value) + '\n - - - - - - \n' return info_str http = urllib3.PoolManager()
def search_text_episode(): LOGGER.info('Searching media using text research on episode') ep = parsed['episode'] # To allow to search on the tvdb tvdb = tvdb_api.Tvdb( cache=False, language='en', ) # Perform the search, if nothing is found, there's a problem... try: tvdb_series = tvdb.search(ep['show']) except Exception as e: raise ResolveException(e) if not tvdb_series: return tvdb_series = tvdb[tvdb_series[0]['seriesName']] tvdb_episode = tvdb_series[ep['season']][ep['episodes'][0]] # Initialize the imdb object to perform the research imdb = imdbpie.Imdb( exclude_episodes=False, ) # Initialize the series imdb id to None so we know if the TVDB # id was at least sufficient to determine the series we're # watching series_imdbid = None # Try and get the information using the TVDB id we just found trakt_result = None if trakt_api_key: trakt_session = requests.Session() trakt_session.headers.update({ 'Content-Type': 'application/json', 'trakt-api-version': '2', 'trakt-api-key': trakt_api_key, }) resp = trakt_session.get( 'https://api.trakt.tv/search/tvdb/{}?type=episode'.format( tvdb_episode['id']), ) # If we found the result from Trakt, use it! if resp.status_code == requests.codes.ok: trakt_result = resp.json()[0] if trakt_result['episode']['ids']['imdb']: return imdb, imdb.get_title( trakt_result['episode']['ids']['imdb']) if trakt_result['show']['ids']['imdb']: series_imdbid = trakt_result['show']['ids']['imdb'] if not series_imdbid: # Use imdb to search for the series using its name or aliases search = None for seriesName in [tvdb_series['seriesName'], ] + \ tvdb_series['aliases']: try: search = imdb.search_for_title(seriesName) except Exception as e: raise ResolveException(e) # Filter the results by name and type search = [ s for s in search if s['type'] == 'TV series' and (s['title'] == seriesName or '{title} ({year})'.format(**s) == seriesName) ] # If there is still more than one, filter by year if len(search) > 1: search = [ s for s in search if s['year'] == tvdb_series['firstAired'][:4] ] # If we have a series, we can stop there! if search: break # If we did not find anything that matches if not search: return # We found the series IMDb id series_imdbid = search[0]['imdb_id'] # Get the series' seasons and episodes imdb_series = imdb.get_title_episodes(series_imdbid) for imdb_season in imdb_series['seasons']: if 'season' not in imdb_season or \ imdb_season['season'] != ep['season']: continue for imdb_episode in imdb_season['episodes']: if 'episode' not in imdb_episode or \ imdb_episode['episode'] != ep['episodes'][0]: continue # id is using format /title/ttXXXXXX/ return imdb, imdb.get_title(imdb_episode['id'][7:-1]) # We did not find the series, but if we have extra ids, try to # fake it! if trakt_result: # Add as many episodes as needed to compute the information for numbers = [trakt_result['episode']['number'],] while len(numbers) < len(parsed['episode']['episodes']): numbers.append(numbers[-1] + 1) # Prepare the show information parentTitle = { 'id': ( '/title/{}/'.format( trakt_result['show']['ids']['imdb']) if trakt_result['show']['ids'].get('imdb') else None ), 'year': trakt_result['show']['year'], 'title': trakt_result['show']['title'], 'titleType': 'tvSeries', } parentTitle.update({ '{}id'.format(k): v for k, v in trakt_result['show']['ids'].items() if v is not None }) media_list = [] for epnum in numbers: # Query for the episode information directly from Trakt resp = trakt_session.get( 'https://api.trakt.tv/shows/{}/' 'seasons/{}/episodes/{}?extended=full'.format( trakt_result['show']['ids']['slug'], trakt_result['episode']['season'], epnum, ), ) # If there was an error, raise an exception resp.raise_for_status() # Else, get the JSON data respj = resp.json() # Prepare the fake imdb media output media = { 'base': { 'id': ( '/title/{}/'.format(respj['ids']['imdb']) if respj['ids'].get('imdb') else None ), 'parentTitle': parentTitle, 'year': dateutil.parser.parse( respj['first_aired']).year, 'episode': respj['number'], 'season': respj['season'], 'title': respj['title'], 'seriesStartYear': parentTitle['year'], 'runningTimeInMinutes': respj.get('runtime'), }, } media['base'].update({ '{}id'.format(k): v for k, v in respj['ids'].items() if v is not None }) # And add it to the list media_list.append(media) # Return the list we just build return ReturnResult(media_list) # Not found return