def get_movie_similarity_dfs(connection): query = """select * from film_list;""" whole_df = ad.get_data_from_query(connection, query) if check_if_file_exists("pickled_files", "movie_genre.pkl"): with open("pickled_files/movie_genre.pkl", "rb") as f: movie_genre = pickle.load(f) else: movie_genre = whole_df[['FID', 'category']] movie_genre = pd.get_dummies(movie_genre, prefix=['category']) with open("pickled_files/movie_genre.pkl", "wb") as f: pickle.dump(movie_genre, f) if check_if_file_exists("pickled_files", "encoded_actor_df.pkl"): with open("pickled_files/encoded_actor_df.pkl", "rb") as f: encoded_actor_df = pickle.load(f) else: movie_actors = whole_df[['FID', 'actors']] encoded_actor_df = actors_df_categorical(connection, movie_actors) with open("pickled_files/encoded_actor_df.pkl", "wb") as f: pickle.dump(encoded_actor_df, f) if check_if_file_exists("pickled_files", "movie_price.pkl"): with open("pickled_files/movie_price.pkl", "rb") as f: movie_price = pickle.load(f) else: movie_price = whole_df[['FID', 'price']] with open("pickled_files/movie_price.pkl", "wb") as f: pickle.dump(movie_price, f) if check_if_file_exists("pickled_files", "movie_length.pkl"): with open("pickled_files/movie_length.pkl", "rb") as f: movie_length = pickle.load(f) else: movie_length = whole_df[['FID', 'length']] with open("pickled_files/movie_length.pkl", "wb") as f: pickle.dump(movie_length, f) if check_if_file_exists("pickled_files", "fid_list.pkl"): with open("pickled_files/fid_list.pkl", "rb") as f: fid_list = pickle.load(f) else: fid_list = list( whole_df['FID'] ) # The fid_list is used by the function "create_df_with_cos" with open("pickled_files/fid_list.pkl", "wb") as f: pickle.dump(fid_list, f) return movie_genre, encoded_actor_df, movie_price, movie_length, fid_list
def get_actor_list(connection): if check_if_file_exists("pickled_files", "actor_list.pkl"): with open("pickled_files/actor_list.pkl", "rb") as f: actor_list = pickle.load(f) else: query = """select CONCAT(first_name, " " ,last_name) as Full_Name from actor""" actor_df = ad.get_data_from_query(connection, query) actor_list = list(actor_df['Full_Name']) with open("pickled_files/actor_list.pkl", "wb") as f: pickle.dump(actor_list, f) return actor_list
def get_movie_details(connection, movie_list): temp_string = "" for each_movie in movie_list: temp_string += str(each_movie) + "," temp_string = temp_string.rstrip(',') final_temp_string = "(" + temp_string + ")" # print("final_temp_string", final_temp_string) query = f"""select FID, title, category from film_list where FID in {final_temp_string};""" required_df = ad.get_data_from_query(connection, query) # print(required_df) return required_df
def find_recent_purchase(customer_id): if check_if_file_exists("pickled_files", "recent_purchase.pkl"): with open("pickled_files/recent_purchase.pkl", "rb") as f: recent_purchase_df = pickle.load(f) else: query = f"""select rental.customer_id, film_list.FID, film_list.title, film_list.category, rental.rental_date from rental join inventory on rental.inventory_id = inventory.inventory_id join film_list on inventory.film_id = film_list.FID where rental.customer_id = {customer_id} order by rental.customer_id, rental.rental_date desc;""" recent_purchase_df = ad.get_data_from_query(ad.db_connection, query) with open("pickled_files/recent_purchase.pkl", "wb") as f: pickle.dump(recent_purchase_df, f) return recent_purchase_df
def select_top_genre(): """ :return: a dictionary with key as customer_id and values as [FULL_NAME,(TOP 3 genres)] """ query = """select rental.customer_id, CONCAT(customer.first_name," ",customer.last_name) as FULL_NAME, category.name as Category, count(*) as COUNT_RENTED_MOVIES from rental join inventory on rental.inventory_id = inventory.inventory_id join film_category on inventory.film_id = film_category.film_id join category on film_category.category_id = category.category_id join customer on customer.customer_id = rental.customer_id group by rental.customer_id, category.category_id order by FULL_NAME, COUNT_RENTED_MOVIES desc; """ if check_if_file_exists("pickled_files", "dictionary.pkl"): with open("pickled_files/dictionary.pkl", "rb") as f: top3_dict = pickle.load(f) else: read_data = ad.get_data_from_query(ad.db_connection, query, pd_df=True) ad.set_multi_index(read_data, ['customer_id', 'FULL_NAME'], inplace=True) top3_dict = {} # print(df.index.drop_duplicates(keep='first')) multi_index = read_data.index.drop_duplicates(keep='first') # print(read_data.head()) for each_element in multi_index: temp = tuple( read_data.loc[each_element[0]]['Category'].head(3).to_list()) top3_dict[each_element[0]] = [each_element[1], temp] with open("pickled_files/dictionary.pkl", "wb") as f: pickle.dump(top3_dict, f) return top3_dict
def get_all_customer_ids(): query = """select distinct customer_id from rental""" returned_data = ad.get_data_from_query(ad.db_connection, query) # print(list(returned_data['customer_id'])) customer_id_list = list(returned_data['customer_id']) return customer_id_list