def movie_rat(f): movie=f[2] if len(f)>3: for i in range(3,len(f)): movie+=' '+f[i] rt = RT('53uhmdfpu5sybbb5y529skkh') #amitbj96 title=rt.search(movie)[0]['title'] D1=rt.search(title,page_limit=1)[0]['ratings'] D=rt.search(title)[0]['synopsis'] D=str(D); if D=='': D="No Synopsis Found." # [movie name , ratings 5/5 , synopsis LL= [title,str(D1['audience_score'])+"/100",str(D)] return LL
def get_rottentomatoes_ratings(title, year, result=None): print "Processing {} - {}".format(title, year) result = result or defaultdict(lambda: "N/A", {'title':title, 'year': year}) notes = [] try: rt = RT(RT_API_KEY) movie_list = rt.search(process_title(title)) if year: movie_list = filter_year(movie_list, year) if not movie_list: raise Exception("No results found.") try: movie = movie_list[0] result['rt_matched_title'] = movie['title'] result['rt_audience_score'] = movie['ratings']['audience_score'] result['rt_critics_score'] = movie['ratings']['critics_score'] except KeyError: notes.append("Results not found: {}".format(title)) except Exception as e: notes.append("Exception encountered: {}".format(e)) traceback.print_exc() finally: # result['Title'] = title # result['Year'] = year result['rt_notes'] = '|'.join(notes) return result
def backend(movieTitles1,movieTitles2,database,svd_model_file): """ Returns recommended movies for two people movieTitles1: list (!) of movies person 1 wants to watch movieTitles2: list (!) of movies person 2 wants to watch """ #-----------------------------------------CREATE MOVIE ID AND TITLES DATAFRAME-----------------------------------------------------------# moviesDf=createMovieIDTitleDataFrame(database) #-----------------------------------------FIND MOVIEIDS OF INPUT MOVIES------------------------------------------------------------# # TO DO: Fix handling of not finding a movie ITEMIDS1=[getMovieID(moviesDf,movie) for movie in movieTitles1] ITEMIDS2=[getMovieID(moviesDf,movie) for movie in movieTitles2] #-------------------------------------------------GET SIMILARITIES-----------------------------------------------------------------# sims=getSimilarityMatrix(svd_model_file) #------------------------------------------------GET RECOMMENDED MOVIEIDS--------------------------------------------------------------# recsIDs = getRecMovieIDs(ITEMIDS1,ITEMIDS2,sims) #------------------------------------------------CONVERT TO TITLES--------------------------------------------------------------# recTitles=moviesDf.Title.loc[recsIDs].tolist() #------------------------------------------------GET POSTER URLS--------------------------------------------------------------# rt=RT() recMoviesInfo=[rt.search(title)[0] for title in recTitles] posterUrls=[movie['posters']['original'] for movie in recMoviesInfo] movieUrls=[movie['links']['alternate'] for movie in recMoviesInfo] #------------------------------------------------RETURN--------------------------------------------------------------# return zip(recTitles,posterUrls,movieUrls)
def get_rottentomatoes_ratings(title, year, result=None): print "Processing {} - {}".format(title, year) result = result or defaultdict(lambda: "N/A", { 'title': title, 'year': year }) notes = [] try: rt = RT(RT_API_KEY) movie_list = rt.search(process_title(title)) if year: movie_list = filter_year(movie_list, year) if not movie_list: raise Exception("No results found.") try: movie = movie_list[0] result['rt_matched_title'] = movie['title'] result['rt_audience_score'] = movie['ratings']['audience_score'] result['rt_critics_score'] = movie['ratings']['critics_score'] except KeyError: notes.append("Results not found: {}".format(title)) except Exception as e: notes.append("Exception encountered: {}".format(e)) traceback.print_exc() finally: # result['Title'] = title # result['Year'] = year result['rt_notes'] = '|'.join(notes) return result
class RTAdapter(Adapter): """Rotten Tomatoes Adapter Implements the Rotten Tomatoes adapter. """ def __init__(self): self.config = { 'api_key': '8yvmeqtydvquk9bxv4mvemhm', } self.rt = RT(self.config['api_key']) def get_similar_film_titles(self, title): # Get films films = self.rt.search(title)[:5] # Check if results are empty if not films: raise FilmNotFoundError() # Extract titles return [film['title'] for film in films] def get_film(self, title): # Get films films = self.rt.search(title) # Find film in recieved list film_titles = [f.get('title', None) for f in films] found_title = safe_find_film(title, film_titles) # Raise error if not found if not found_title: raise FilmNotFoundError() film = films[film_titles.index(found_title)] return film def get_film_score(self, title): film = self.get_film(title) # Check if ratings exists if not 'ratings' in film: return None # Return film score normalized_score = film['ratings']['critics_score'] / 100.0 return normalized_score def __repr__(self): return 'Rotten Tomatoes'
def get_movie_rating(movie): """Returns a Rotten Tomatoes score for the given movie title""" try: json = RT.search(movie)[0] title = json['title'] rating = json['ratings']['critics_score'] return (title, rating) except IndexError: return None
class IMDBApi(object): def __init__(self, title, year=0): self.query = dict(title=title, year=int(year)) self.rt = RT(rotten) def __call__(self): matches = [] year = self.query['year'] results = self.rt.search(self.query['title']) self.partial = copy.copy(results) if year: results = [x for x in results if x['year']==self.query['year']] for r in results: matches.append(self.rt.info(r['id'])) return matches
def _getAllInfo(self, search_params): ret = [] try: pretty_title = str(search_params) source = RT(self.key) results = source.search(pretty_title) for i in results: title = utils.sanitizeString(i.get("title", search_params.title)) year = str(i.get("year", search_params.year)) info = movie_types.MovieInfo(title, year) #no genre without more effort ret.append(info) except Exception as ex: #bad need to find a better exception utils.logWarning("Lib: {} Title: {} Error {}: {}".format(self.display_name, search_params.title, type(ex), ex), title="{} lookup".format(self.display_name)) return ret
def sms(): response = twiml.Response() body = request.form['Body'] rt = RT() dump = rt.search(body) if dump: rating = dump[0]['ratings']['critics_score'] title = dump[0]['title'] msg = 'The film ' + str(title) + ' received a ' + \ str(rating) + ' on the TOMATOMETER.' if rating > 50: msg = msg + ' I would watch it.' else: msg = msg + ' Skip this one.' else: msg = "We didn't find " + body + " on RT... maybe check your spelling." response.sms(msg) return str(response)
from rottentomatoes import RT import time rt = RT() #movies = ['fight club','gravity','toy story 3','american hustle','skyfall','jack and jill','basic instinct 2','white out','lost souls','babylon','argo','bears','her','up','a beautiful mind','braveheart','the hurt locker','gambit','paranoia','getaway'] movies = [line.strip('\n') for line in open('movies.txt')] f = open('reviews-date.txt', 'w') count = 0 for u in movies: movlst = rt.search(u, page_limit=1) if movlst: if movlst[0][u'id'] != '': review = rt.info(movlst[0][u'id'], 'reviews') review.viewkeys() rlt = review[ u'reviews'] #review is a dict, value of each key is list, each list element is a dict for a in rlt: f.write(a[u'quote'] + '\t' + a[u'date'] + '\n') f2.write(a[u'date'] + '\n') count = count + 1 #print a[u'quote'] if len(rlt) == 0: print u else: print count else: print u else: print u time.sleep(5) f.close()
# making soup html = br.response().read() soup = BeautifulSoup(html) related_movies = soup.find_all("a", "vrt_tl") if len(related_movies) == 10: related_movies = related_movies[5:] related_movies = [related_movie.text for related_movie in related_movies] print str(related_movies) # find imdb of related movie # movie to rotten tomatoes if not previous saved imdb_ids = [] for related_movie in related_movies: try: movie_info = rt.search(related_movie)[0] rt_id = movie_info['id'] imdb_id = "tt" + movie_info['alternate_ids']['imdb'] imdb_ids.append(imdb_id) save_movie_info_to_mongo.delay(related_movie, rt_id=rt_id) except Exception as e: print e continue # saving the imdb ids if len(imdb_ids) > 0: print imdb_ids movie._similar_movies = imdb_ids movie.save() time.sleep(5)
class MovieInfo(object): def __init__(self, movie, rotten_tomatoe_api_key, tmdb_api_key, aws_access_key, aws_secret_key, affiliate_key, rt_id=None, tmdb_id=None): self._movie = movie # amazon self._amazon_product_search = AmazonProductSearch(aws_access_key, aws_secret_key, affiliate_key) # rotten tomatoes self._rt = RT(rotten_tomatoe_api_key) if rt_id: self._rt_data = self._rt.info(rt_id) else: self._rt_data = self._rt.search(movie)[0] # tmdb self._tmdb = tmdb self._tmdb.configure(tmdb_api_key) movie = self._tmdb.Movies(movie, limit=True, expected_release_date=self._rt_data['release_dates']['theater']).get_best_match() self._tmdb_data = self._tmdb.Movie(movie[1]['id']) # youtube self._yt_service = gdata.youtube.service.YouTubeService() def get_amazon_purchase_links(self, top_cast, runtime): products = self._amazon_product_search.item_search(self._movie, top_cast, runtime) return products @property def cast(self): ''' Returns the names of the full cast for this movie ''' full_cast = self._rt.info(self._rt_data['id'], 'cast') names = [cast['name'] for cast in full_cast['cast']] return names @property def critic_reviews(self): ''' Returns a list of critic reviews for this movie. The list is componsed of json document. ''' reviews = self._rt.info(self._rt_data['id'], 'reviews') return reviews['reviews'] @property def critics_score(self): ''' Returns the rotten tomatoe critic score for this movie ''' return self._rt_data['ratings']['critics_score'] @property def director(self): ''' Returns a list of directors for this movie ''' return self._tmdb_data.get_director() @property def genres(self): ''' Returns the genres of this movie, supplied by tmdb ''' genres = self._tmdb_data.get_genres() genres = [genre['name'].lower() for genre in genres] return genres @property def imdb_id(self): ''' Returns a list of directors for this movie ''' try: return "tt" + self._rt_data['alternate_ids']['imdb'] except: return self._tmdb_data.get_imdb_id() @property def poster(self): ''' Returns the poster of the movie, in its original size ''' return self._tmdb_data.get_poster() @property def runtime(self): ''' Return the runtime of this movie in minues ''' try: return int(self._rt_data['runtime']) except: return int(self._tmdb_data.get_runtime()) @property def release_date(self): ''' Returns this movie's release date in {year}-{month}-{day} format ''' try: return parser.parse(self._rt_data['release_dates']['theater']) except: return parser.parse(self._tmdb_data.get_release_date()) @property def similar_movies(self): ''' Returns a list of imdb ids of movies that are similar to this one ''' movies = self._rt.info(self._rt_data['id'], 'similar')['movies'] # if movie is none or len(movies) == 0: # search google current movie title + # parse DOM for return movies @property def synopsis(self): ''' Returns this movie's synopsis ''' synopsis = self._rt_data['synopsis'] if len(synopsis) == 0: synopsis = self._tmdb_data.get_overview() return synopsis @property def title(self): ''' Returns this movie's title ''' return self._rt_data['title'] @property def trailers(self, limit=3): ''' This function returns a list of trailers for this movie. We will use TMDB's data if it returns 3 or more trailers. If not, we will query youtube with the search term: "{movie_name} trailer {release_year}" to find trailers for this movie. Returns a list of youtube ids of the trailers ''' trailers = self._tmdb_data.get_trailers()['youtube'] if len(trailers) > limit: return [trailer['source'] for trailer in trailers] else: release_year = str(self.release_date).split('-')[0] query = gdata.youtube.service.YouTubeVideoQuery() query.vq = "{title} trailer {release_year} ".format( title=self._movie, release_year=release_year) query.orderby = 'relevance' feed = self._yt_service.YouTubeQuery(query) entries = self._remove_long_youtube_videos(feed.entry[:3]) entries = self._remove_unrelated_videos(entries) unique_entries = self._remove_duplicate_youtube_videos(entries) return unique_entries def _remove_duplicate_youtube_videos(self, entries, threshold=5): ''' This method removes duplicate videos by measuring the runtime of the youtube videos. If two videos are within 5 seconds (the threshold) of each other in runtime, we assume that one of the videos is a duplicate of the other. ''' limit = 3 videos = [] for entry in entries: runtime = int(entry.media.duration.seconds) similar = [runtime >= int(video["runtime"]) - threshold and runtime <= int(video["runtime"]) + threshold for video in videos] if not any(similar): video_id = self._extract_youtube_id(entry.media.player.url) videos.append({"yt_id": video_id, "runtime": runtime}) yt_ids = [video['yt_id'] for video in videos] return yt_ids def _remove_long_youtube_videos(self, entries, max_seconds=600): entries = filter(lambda entry: int(entry.media.duration.seconds) < max_seconds, entries) return entries def _remove_unrelated_videos(self, entries): entries = filter(lambda entry: fuzzywuzzy.fuzz.ratio(entry.media.title.text.decode('utf-8').lower(), self._movie.lower()) > 20, entries) return entries def _extract_youtube_id(self, youtube_url): video_id = youtube_url.split('v=')[1] ampersand_position = video_id.find('&') if(ampersand_position != -1): video_id = video_id[0:ampersand_position] return video_id
from rottentomatoes import RT import time rt = RT() # movies = ['fight club','gravity','toy story 3','american hustle','skyfall','jack and jill','basic instinct 2','white out','lost souls','babylon','argo','bears','her','up','a beautiful mind','braveheart','the hurt locker','gambit','paranoia','getaway'] movies = [line.strip("\n") for line in open("movies.txt")] f = open("reviews-date.txt", "w") count = 0 for u in movies: movlst = rt.search(u, page_limit=1) if movlst: if movlst[0][u"id"] != "": review = rt.info(movlst[0][u"id"], "reviews") review.viewkeys() rlt = review[u"reviews"] # review is a dict, value of each key is list, each list element is a dict for a in rlt: f.write(a[u"quote"] + "\t" + a[u"date"] + "\n") f2.write(a[u"date"] + "\n") count = count + 1 # print a[u'quote'] if len(rlt) == 0: print u else: print count else: print u else: print u time.sleep(5) f.close()
write_path = 'C:\Other Projects\Netflix Movies\\' write_file = "titles.csv" f = write_path+write_file with open(f, 'w+') as my_file: my_file.write('\n'.join(netflix_titles)) #Read back in the csv file: netflix_titles = [line.rstrip('\n') for line in open(f)] #set up RT api package from rottentomatoes import RT rt = RT('4cbst6rnnvresrd9e8q83hhs') #just testing how RT's API works fight_clubs = rt.search('101 dalmations') for club in fight_clubs: print "title="+club["title"]+" & ID="+club["id"] + " & released="+str(club["year"]) print "Critics' Score: "+str(club["ratings"]["critics_score"]) print "Audience Score: "+str(club["ratings"]["audience_score"]) if club["title"] in netflix_titles: print "On Netflix!" #scrape http://www.rottentomatoes.com/top/bestofrt/?year=2012 for best movies from lxml import html import requests #let's scrape the top 100 movies from 2008 - present top_movies = [] for i in range(10,15):
try: year = int(word) except ValueError: year = 1 # catches the case of films whose titles only consist of a year, eg. '2012' if (year > 1900 and year < 2015) and len(title) > 0: break if word.upper() in breakwords: break title.append(word) fulltitle = ' '.join(title) results = myrt.search(fulltitle) omdb = omdbsearch(fulltitle) try: genres = omdb["Genre"] except: genres = "" if "comedy" in genres.lower(): #print "DELETE " + fulltitle.upper() + " ASAP! COMEDY FOUND!" comedyflag = True else: #print "Title: " + fulltitle + " Genres: " + genres
#!/usr/bin/python from rottentomatoes import RT import sys import pprint #api_key = 'bpbjuznunrqvkeuqjk9m2dmm' #RT(api_key).search('gone with the wind') rt = RT() dump = rt.search(sys.argv[1]) pprint.pprint(dump) if dump: rating = dump[0]['ratings']['critics_score'] title = dump[0]['title'] msg = 'The film ' + str(title) + ' received a ' + \ str(rating) + ' on the TOMATOMETER.' if rating > 50: msg = msg + ' I would watch it.' else: msg = msg + ' Skip this one.' else: msg = 'nothing here!' print msg
cur.execute("DROP TABLE IF EXISTS rtid") # create the table as detailed in the documentation cur.execute("CREATE TABLE rtid(id INT PRIMARY KEY AUTO_INCREMENT, \ rtid INT)") # bring in the titles and years from the Box Office Mojo data cur.execute("SELECT * FROM mojo") titles = cur.fetchall() # loop through each title and pull out the rt id for the first film # returned from the RT search with the correct year, if no matches # use NULL for the rt id for movie in titles: print j title = movie[1] year = movie[5] startTime = time.time() searchResults = rt.search(title) correctFilm = 0 found = False for result in searchResults: if result["year"] == year: correctFilm = result found = True break if found: rtID = correctFilm["id"] else: rtID = "NULL" cur.execute("INSERT INTO rtid (rtid) VALUE (" + str(rtID) + ")") j = j + 1 endTime = time.time() # rt api limits to 5 calls per second, with latency this turns out to
#!/usr/bin/env python from rottentomatoes import RT import json, csv rt = RT() #movies = ['toy story 3', 'the lion king', 'the matrix', 'the dark knight', 'inception', 'titanic', 'the godfather', 'the little mermaid', 'the shining', 'avatar'] movies = ['pocahontas', 'shawshank redemption'] json = [] for movie in movies: json = json + rt.search(movie) with open('test2.csv', 'wb+') as f: dict_writer = csv.DictWriter( f, fieldnames=[ 'ratings', 'abridged_directors', 'links', 'title', 'critics_consensus', 'release_dates', 'abridged_cast', 'synopsis', 'mpaa_rating', 'year', 'alternate_ids', 'posters', 'runtime', 'id' ]) dict_writer.writeheader() dict_writer.writerows(json)
class MovieInfo(object): def __init__(self, movie, rotten_tomatoe_api_key, tmdb_api_key, aws_access_key, aws_secret_key, affiliate_key, rt_id=None, tmdb_id=None): self._movie = movie # amazon self._amazon_product_search = AmazonProductSearch( aws_access_key, aws_secret_key, affiliate_key) # rotten tomatoes self._rt = RT(rotten_tomatoe_api_key) if rt_id: self._rt_data = self._rt.info(rt_id) else: self._rt_data = self._rt.search(movie)[0] # tmdb self._tmdb = tmdb self._tmdb.configure(tmdb_api_key) movie = self._tmdb.Movies( movie, limit=True, expected_release_date=self._rt_data['release_dates'] ['theater']).get_best_match() self._tmdb_data = self._tmdb.Movie(movie[1]['id']) # youtube self._yt_service = gdata.youtube.service.YouTubeService() def get_amazon_purchase_links(self, top_cast, runtime): products = self._amazon_product_search.item_search( self._movie, top_cast, runtime) return products @property def cast(self): ''' Returns the names of the full cast for this movie ''' full_cast = self._rt.info(self._rt_data['id'], 'cast') names = [cast['name'] for cast in full_cast['cast']] return names @property def critic_reviews(self): ''' Returns a list of critic reviews for this movie. The list is componsed of json document. ''' reviews = self._rt.info(self._rt_data['id'], 'reviews') return reviews['reviews'] @property def critics_score(self): ''' Returns the rotten tomatoe critic score for this movie ''' return self._rt_data['ratings']['critics_score'] @property def director(self): ''' Returns a list of directors for this movie ''' return self._tmdb_data.get_director() @property def genres(self): ''' Returns the genres of this movie, supplied by tmdb ''' genres = self._tmdb_data.get_genres() genres = [genre['name'].lower() for genre in genres] return genres @property def imdb_id(self): ''' Returns a list of directors for this movie ''' try: return "tt" + self._rt_data['alternate_ids']['imdb'] except: return self._tmdb_data.get_imdb_id() @property def poster(self): ''' Returns the poster of the movie, in its original size ''' return self._tmdb_data.get_poster() @property def runtime(self): ''' Return the runtime of this movie in minues ''' try: return int(self._rt_data['runtime']) except: return int(self._tmdb_data.get_runtime()) @property def release_date(self): ''' Returns this movie's release date in {year}-{month}-{day} format ''' try: return parser.parse(self._rt_data['release_dates']['theater']) except: return parser.parse(self._tmdb_data.get_release_date()) @property def similar_movies(self): ''' Returns a list of imdb ids of movies that are similar to this one ''' movies = self._rt.info(self._rt_data['id'], 'similar')['movies'] # if movie is none or len(movies) == 0: # search google current movie title + # parse DOM for return movies @property def synopsis(self): ''' Returns this movie's synopsis ''' synopsis = self._rt_data['synopsis'] if len(synopsis) == 0: synopsis = self._tmdb_data.get_overview() return synopsis @property def title(self): ''' Returns this movie's title ''' return self._rt_data['title'] @property def trailers(self, limit=3): ''' This function returns a list of trailers for this movie. We will use TMDB's data if it returns 3 or more trailers. If not, we will query youtube with the search term: "{movie_name} trailer {release_year}" to find trailers for this movie. Returns a list of youtube ids of the trailers ''' trailers = self._tmdb_data.get_trailers()['youtube'] if len(trailers) > limit: return [trailer['source'] for trailer in trailers] else: release_year = str(self.release_date).split('-')[0] query = gdata.youtube.service.YouTubeVideoQuery() query.vq = "{title} trailer {release_year} ".format( title=self._movie, release_year=release_year) query.orderby = 'relevance' feed = self._yt_service.YouTubeQuery(query) entries = self._remove_long_youtube_videos(feed.entry[:3]) entries = self._remove_unrelated_videos(entries) unique_entries = self._remove_duplicate_youtube_videos(entries) return unique_entries def _remove_duplicate_youtube_videos(self, entries, threshold=5): ''' This method removes duplicate videos by measuring the runtime of the youtube videos. If two videos are within 5 seconds (the threshold) of each other in runtime, we assume that one of the videos is a duplicate of the other. ''' limit = 3 videos = [] for entry in entries: runtime = int(entry.media.duration.seconds) similar = [ runtime >= int(video["runtime"]) - threshold and runtime <= int(video["runtime"]) + threshold for video in videos ] if not any(similar): video_id = self._extract_youtube_id(entry.media.player.url) videos.append({"yt_id": video_id, "runtime": runtime}) yt_ids = [video['yt_id'] for video in videos] return yt_ids def _remove_long_youtube_videos(self, entries, max_seconds=600): entries = filter( lambda entry: int(entry.media.duration.seconds) < max_seconds, entries) return entries def _remove_unrelated_videos(self, entries): entries = filter( lambda entry: fuzzywuzzy.fuzz.ratio( entry.media.title.text.decode('utf-8').lower(), self._movie.lower()) > 20, entries) return entries def _extract_youtube_id(self, youtube_url): video_id = youtube_url.split('v=')[1] ampersand_position = video_id.find('&') if (ampersand_position != -1): video_id = video_id[0:ampersand_position] return video_id