def get_mojo_rank_info(): """ Function to calculate weekend box office rank summaries from mojo info """ #get movies from the db and calulate rank info rank_info_df = movie_helper.get_highest_mojo_rank() with tqdm(total=len(rank_info_df)) as pbar: for index, row in rank_info_df.iterrows(): #update the database updates = { "best_rank": int(row['best_rank']), 'weekends_at_best_rank': int(row['weekends_at_best_rank']), 'weekends_in_top_3': int(row['weekends_in_top_3']), 'weekends_in_top_5': int(row['weekends_in_top_5']), 'weekends_in_top_10': int(row['weekends_in_top_10']), 'weekends_in_top_15': int(row['weekends_in_top_15']) } selects = {"movieId": int(row["movieId"])} database_helper.update_data("movies", update_params=updates, select_params=selects) pbar.update(1)
def get_mojo_data(): """ Function which uses imdb id to scrape movie financial summary from BoxOfficeMojo """ #get all movies from db movies_df = movie_helper.get_movies_df() with tqdm(total=len(movies_df)) as pbar: for index, row in movies_df.iterrows(): #if imdb id exists use it to scrape info from box office mojo if (row['imdbId']): #get stats and update the db stats = mojo_helper.get_mojo_stats(row['imdbId']) updates = { "budget_usd": stats["Budget"], "uk_gross_usd": stats["UK"], "domestic_gross_usd": stats["Domestic"], "worldwide_gross_usd": stats["Worldwide"], "international_gross_usd": stats["International"] } selects = {"movieId": row["movieId"]} database_helper.update_data("movies", update_params=updates, select_params=selects) pbar.update(1)
def get_mojo_run_info(): """ Function to calculate weekend box office summaries from mojo info """ #get movies from the db and calulate run info run_info_df = movie_helper.get_movie_run_info() with tqdm(total=len(run_info_df)) as pbar: for index, row in run_info_df.iterrows(): #update the database updates = { "end_weekend": row['end_weekend'], "total_weekends": row['total_weekends'], "total_release_weeks": row['total_release_weeks'], "first_run_end": row['first_run_end'], "first_run_weeks": row['first_run_weeks'] } selects = {"movieId": row["movieId"]} database_helper.update_data("movies", update_params=updates, select_params=selects) pbar.update(1)
def get_release_dates(): """ Funciton which uses imdb to collect uk release date of films. """ #get all movies from db movies_df = movie_helper.get_movies_df() with tqdm(total=len(movies_df)) as pbar: for index, row in movies_df.iterrows(): #get list of release dates from API movie = ia.get_movie(str(row['imdbId']), info='release dates') release_dates = movie['release dates'] #try to extract UK release dates (string from imdb is a mess) uk = [ i for i in movie['release dates'] if 'UK' in i and not '(' in i ] if (len(uk) > 0): #if successful update the db with the release date date_string = uk[0].split('::')[1] date = datetime.strptime(date_string, '%d %B %Y') database_helper.update_data( "movies", update_params={"ukReleaseDate": date}, select_params={"movieId": row["movieId"]}) else: #if no uk release date found print to console print("No UK release for ", row.title) pbar.update(1)
def get_cast_notes(): """Function which uses imdb to collect cast notes eg Credited/Uncredited""" #get all movies from db movies_df = movie_helper.get_movies_df() with tqdm(total=len(movies_df)) as pbar: for index, row in movies_df.iterrows(): #if imdbid exists use it to collect cast notes if (row['imdbId']): movie = ia.get_movie(str(row['imdbId'])) cast_list = movie.get('cast') if (cast_list != None): for cast_member in cast_list: imdb_id = cast_member.personID updates = {'notes': cast_member.notes} selects = { "p_imdbId": imdb_id, "m_imdbId": row['imdbId'] } database_helper.update_data("actors", update_params=updates, select_params=selects) pbar.update(1)
def get_keywords(): """ Function which uses imdb id to collect plot keywords """ #get all movies from db movies_df = movie_helper.get_movies_df() with tqdm(total=len(movies_df)) as pbar: for index, row in movies_df.iterrows(): #if imbdid exists use it to look up the API if (row['imdbId']): #get list of keywords and created delimted string movie = ia.get_movie(str(row['imdbId']), info='keywords') try: keywords = ",".join(movie['keywords']) except: keywords = None #update the movies table in the db database_helper.update_data( "movies", update_params={"keywords": keywords}, select_params={"movieId": row["movieId"]}) pbar.update(1)
def update_tweet_sentiments(): """Function to assign sentiment socres and classification to all tweets in the movie_tweets2019 table""" with tqdm(total=len(movies)) as pbar: #assign tweet sentiment to tweets for each movie for movie in movies: sentiment_df = tweet_helper.get_tweet_sentiments_scores( movie.movieId) #update the db with newly assigned tweet sentiment and classes for index, row in sentiment_df.iterrows(): update_params = { "negative_scr": row["negative_scr"], "positive_scr": row["positive_scr"], "neutral_scr": row["neutral_scr"], "compound_scr": row["compound_scr"], "senti_class": row["senti_class"] } select_params = {"id": row["id"]} database_helper.update_data("movie_tweets2019", update_params=update_params, select_params=select_params) pbar.update(1)
def get_trailer_metadata(): """ Function which uses youtubeId to collect trailer metadata """ #get all trailers from the database trailers_df = database_helper.select_query("trailers") with tqdm(total=len(trailers_df)) as pbar: for index, row in trailers_df.iterrows(): #use the youtube id to make an api request for video meta data trailer_data = yt.get_video_metadata(row['youtubeId']) #update the db with collected meta data update_params = { 'title': trailer_data['video_title'], 'channelTitle': trailer_data['channel_title'], 'channelId': trailer_data['channel_id'], 'categoryId': trailer_data['video_category'], 'commentCount': trailer_data['video_comment_count'], 'description': trailer_data['video_description'], 'likeCount': trailer_data['video_like_count'], 'dislikeCount': trailer_data['video_dislike_count'], 'viewCount': trailer_data['video_view_count'], 'publishDate': trailer_data['video_publish_date'], 'tags': trailer_data['video_tags'] } select_params = {"youtubeId": row["youtubeId"]} database_helper.update_data("trailers", update_params=update_params, select_params=select_params) pbar.update(1)
def get_metaData(): """ Function which uses imdbId to retreive metadata from IMDb for each movie """ #get all movies from db movies_df = movie_helper.get_movies_df() #get movie meta data with tqdm(total=len(movies_df)) as pbar: for index, row in movies_df.iterrows(): #if an imdbid exists use it to look up the API if (row['imdbId']): #get base meta data from imdb movie = ia.get_movie(str(row['imdbId'])) year = movie['year'] #created delimited list of genre strings if (movie.get('genres')): genres = ','.join(movie.get('genres')) rating = movie.get('rating') votes = movie.get('votes') #create delimited list of movie certificates certificates = None if (movie.get('certificates')): certificates = ','.join(movie.get('certificates')) #update database with collected meta data update_params = { "year": year, "genres": genres, "rating": rating, "votes": votes, "certificates": certificates } select_params = {"movieId": row["movieId"]} database_helper.update_data("movies", update_params=update_params, select_params=select_params) pbar.update(1)
def get_imdbIds(): """ Function which uses the movie title from BFI to get the imdb id from IMDb api """ #get all movies from db movies_df = movie_helper.get_movies_df() for index, row in movies_df.iterrows(): #use the api to search imdb for films with the the title search_results = ia.search_movie(row['title']) #only interested in movie objects movie_results = list( filter(lambda x: x.get('kind') == 'movie', search_results)) if (len(movie_results) > 0): #take the first results by default movie = movie_results[0] #if there is more than one then get most recent? if (len(movie_results) > 1): #flag issue to console so movie can be manually checked print("Check: ", row['title']) #try to get the one from 2019 year_results = list( filter(lambda x: x.get('year') == 2019, movie_results)) if (len(year_results) > 0): movie = year_results[0] #extract imdb url and id using API movie_url = ia.get_imdbURL(movie) movie_id = ia.get_imdbID(movie) #update database database_helper.update_data( "movies", update_params={ "imdbId": movie_id, "url": movie_url }, select_params={"movieId", row["movieId"]})
def get_trailer_release_dates(): """Function to specifically update the trailer release dates which could not be retreived by get_trailer_metadata()""" #get all trailers from the db trailers_df = database_helper.select_query("trailers") with tqdm(total=len(trailers_df)) as pbar: for index, row in trailers_df.iterrows(): #use customized api request to correctly retreive the release dates of the trailers trailer_date = youtube_helper.get_trailer_release( row['youtubeId'], yt) #update the database update_params = {'publishDate': trailer_date} select_params = {"youtubeId": row["youtubeId"]} database_helper.update_data("trailers", update_params=update_params, select_params=select_params) pbar.update(1)
def get_critical_period(): """ Function to calculate the film critical period based on the release date and weekend box office info """ #get movies from df and calculate crticial period movies_df = movie_helper.get_critical_period() with tqdm(total=len(movies_df)) as pbar: for index, row in movies_df.iterrows(): #update the database updates = { "critical_start": row['critical_start'], 'critical_end': row['critical_end'] } selects = {"movieId": int(row["movieId"])} database_helper.update_data("movies", update_params=updates, select_params=selects) pbar.update(1)
if (greta_res.get('certificates')): certificates = ','.join(greta_res.get('certificates')) #update database update_params = { "imdbId": '2639336', "url": 'https://www.imdb.com/title/tt2639336/', "year": year, "genres": genres, "rating": rating, "votes": votes, "certificates": certificates } select_params = {"movieId": int(greta["movieId"])} database_helper.update_data("movies", update_params=update_params, select_params=select_params) #"Kobiety Mafii 2" # kobiety_mafii = database_helper.select_query("movies", { "movieId" : 262 }) # kobiety_mafii = kobiety_mafii.iloc[0] # kobiety_mafii_res = ia.get_movie('8858420') # year = kobiety_mafii_res['year'] # if (kobiety_mafii_res.get('genres')): # genres = ','.join(kobiety_mafii_res.get('genres')) # rating = kobiety_mafii_res.get('rating') # votes = kobiety_mafii_res.get('votes') # certificates = None # if (kobiety_mafii_res.get('certificates')): # certificates = ','.join(kobiety_mafii_res.get('certificates'))