def main(): #0. global variable global RECOMMEND_FOR_NEW_USER global NEW_ITEM_LIST global TOP_PRODUCT global collaborative_model global association_model # cold start: #1. Content Based: print("COLD START CONTENT BASED") ContentBased.cold_start() #2. Collaborative: print("COLD START COLLABORATIVE") collaborative_model.cold_start() #3. Association: print("COLD START ASSOCIATION") association_model.cold_start() #4. NEW_ITEM, ITEM_FOR_NEW_USER NEW_ITEM_LIST = NewProducts.update_new_item_list() TOP_PRODUCT = TopProducts.update_top_product() RECOMMEND_FOR_NEW_USER = update_recommend_new_user() #5. schedule schedule.every().day.at("02:00").do(job_shedule) # CODE # while (1): # update_for_new_user every 3 A.M schedule.run_pending() get_action_make_recommend()
def home(): if "recommend" in request.args: try: title = request.args["recommend"] rec = ContentBased() did_you_mean = False df = rec.recommend(title, DEFAULT_LIMIT, full_search=True, keywords_and_desc=False, critics=False) poster_paths = get_poster_paths(df["id"].tolist(), df["original_title"].tolist()) if rec.changed_title != title and rec.changed_title != str(): did_you_mean = True else: rec.changed_title = title rec_title_meta = get_meta(rec.changed_title, None) rec_id = rec_title_meta[0]["id"] return render_template('recommendations.html', titles=df["original_title"].tolist(), images=poster_paths, votes=df["vote_average"].tolist(), m_id=df["id"].tolist(), rec_title=rec.changed_title, rec_id=rec_id, did_you_mean=did_you_mean) except: abort(404) elif "genres" in request.args: genre = request.args["genres"] if genre == "All": genre = None offset = int(request.args["offset"]) gen_rec = Recommendation() gen_rec.filter_genres() df = gen_rec.top_movies(gen_rec.md, percentile=0.85, limit=DEFAULT_LIMIT, offset=offset, genre=genre) poster_paths = get_poster_paths(df["id"].tolist(), df["original_title"].tolist()) return render_template('recommendations.html', titles=df["original_title"].tolist(), images=poster_paths, votes=df["vote_average"].tolist(), m_id=df["id"].tolist(), rec_title=request.args["genres"], offset=offset, next_offset=offset + DEFAULT_LIMIT, prev_offset=offset - DEFAULT_LIMIT, rec_id=None, did_you_mean=None) else: return render_template('homepage.html')
def setUp(self): self.client = MongoClient('mongodb://localhost:27017') self.db = self.client['moviesDB'] self.collection = self.db['movies'] self.test_df = DataFrame(list(self.collection.find())) self.valid_cb = ContentBased("_id", "5f96e0b8639a72229e0fd042", self.test_df, ["Title", "Genre", "Director", "Actors"]) self.recs = self.valid_cb.get_recommendations(10, "Rating")
def job_shedule(): global RECOMMEND_FOR_NEW_USER global NEW_ITEM_LIST global TOP_PRODUCT global collaborative_model global association_model RECOMMEND_FOR_NEW_USER = update_recommend_new_user() NEW_ITEM_LIST = NewProducts.update_new_item_list() TOP_PRODUCT = TopProducts.update_top_product() ContentBased.cold_start() association_model.cold_start()
class testContentBased(unittest.TestCase): def setUp(self): self.client = MongoClient('mongodb://localhost:27017') self.db = self.client['moviesDB'] self.collection = self.db['movies'] self.test_df = DataFrame(list(self.collection.find())) self.valid_cb = ContentBased("_id", "5f96e0b8639a72229e0fd042", self.test_df, ["Title", "Genre", "Director", "Actors"]) self.recs = self.valid_cb.get_recommendations(10, "Rating") def test_constructor(self): self.assertIsInstance(self.valid_cb, ContentBased) self.assertRaises(TypeError, ContentBased, "_id", None, None, None) self.assertRaises(TypeError, ContentBased, None, "5f96e0b8639a72229e0fd042", None, None) self.assertRaises(TypeError, ContentBased, None, self.test_df, None, None) self.assertRaises(TypeError, ContentBased, None, None, None, []) self.assertRaises(TypeError, ContentBased, None, None, None, None) # Since the consructor call the sort_frame() function these tests serve to test that as well self.assertRaises(KeyError, ContentBased, "_id", "5f96e0b8639a72229e0fd042", self.test_df, ["Title", "Ge", "Director", "Actors"]) self.assertRaises(Exception, ContentBased, "_id", "blah123", self.test_df, ["Title", "Genre", "Director", "Actors"]) def test_get_recommendations(self): self.assertIsInstance(self.recs, list) self.assertIsInstance(self.recs[0], dict) self.assertRaises(KeyError, self.valid_cb.get_recommendations, 10, "Bad Value")
def movie_meta(): if "title" in request.args: try: title = request.args["title"] m_id = request.args["id"] df_meta = get_meta(title, m_id) poster_path = get_poster_paths([int(m_id)], [title])[title] rec = ContentBased() df_rec = rec.recommend(title, 5, full_search=True, keywords_and_desc=False, critics=False) rec_poster_paths = get_poster_paths( df_rec["id"].tolist(), df_rec["original_title"].tolist(), small=True) return render_template( 'meta.html', title=df_meta[0]["original_title"], genres=df_meta[0]["genres"], homepage=df_meta[0]["homepage"], overview=df_meta[0]["overview"], release=df_meta[0]["release_date"], production=df_meta[0]["production_companies"], runtime=df_meta[0]["runtime"], tagline=df_meta[0]["tagline"], vote_average=df_meta[0]["vote_average"], vote_count=df_meta[0]["vote_count"], cast=df_meta[1], director=df_meta[2], poster_path=poster_path, rec_posters=rec_poster_paths, rec_titles=df_rec["original_title"].tolist(), rec_m_ids=df_rec["id"].tolist(), imdb_id=df_meta[3]) except: abort(404) else: abort(404)
def get_recommendation(self, movie, review, critics=False, full_search=False, use_pickle=True): """ For hybrid recommendations: LIMIT (instance var) determines no. of movies outputted param: movie - title of the movie (as mentioned in DB) review - rating of the movie on the scale of 1-5 critics - (True or False type) Critically acclaimed recommendations full_search - True: Recommendations generated using keywords, cast, crew and genre False: Recommendations generated on basis of tagline and overview return: pandas DataFrame object with attributes - title, id, vote_average, vote_count, popularity, release_date Recommendations which have frequency greater than 1 in both collaborative and content based filtering results are chosen as result. If the total result found are less than limit than the difference is divided into a ratio of 2:1, for content based and collaborative results. i.e Out of the remaining results, 2x of them will be content based and 1x collaborative based. """ rec_content_obj, rec_coll_obj = ContentBased(), CollaborativeFiltering( ) rec_content = rec_content_obj.recommend(movie, self.LIMIT, critics, full_search, use_pickle) rec_content = self.convert_literal_eval( rec_content.to_json(orient="records", lines=True)) print("Content Filtering completed.....") rec_coll_obj.LIMIT = 1000 rec_coll = rec_coll_obj.user_model({movie: review}) rec_coll = self.convert_literal_eval( rec_coll.to_json(orient="records", lines=True)) print("Collaborative Filtering completed.....") movies_freq = Counter( list([movie["title"] for movie in rec_coll]) + list([movie["original_title"] for movie in rec_content])).most_common(self.LIMIT) # accepting movies whose frequency is greater than 1 from collaborative and content based results total_movies_rec = [movie[0] for movie in movies_freq if movie[1] > 1] # print(total_movies_rec) movie_df = pd.DataFrame(columns=[ "title", "id", "vote_average", "vote_count", "popularity", "release_date", ]) index = 0 for movie in total_movies_rec: movie_json = self.get_movie_json(movie, rec_coll, rec_content) movie_df.loc[index] = array(list(movie_json.values())) index += 1 if len(total_movies_rec) < self.LIMIT: rec_content_cutoff = ( (self.LIMIT - len(total_movies_rec)) * 2) // 3 start_index = index rec_title_name = {0: "original_title", 1: "title"} curr_rec = 0 # as we start with content based results for rec in [rec_content, rec_coll]: for movie in rec: if (movie[rec_title_name[curr_rec]] not in movie_df["title"].tolist()): movie_df.loc[index] = array(list(movie.values())[:6]) index += 1 if start_index + rec_content_cutoff == index and rec == rec_content: curr_rec = 1 break elif index == self.LIMIT: break if index == self.LIMIT: break print("Hybrid Filtering completed.....") return movie_df
def get_action_make_recommend(): global collaborative_model global association_model recommend_list = [] # recommend list has 6 elements # print("start at", datetime.now().strftime("%H:%M:%S")) actions = user_product_event.find() for action in actions: # take information of this action user = action.get('user_id') product = action.get('product_id') event = action.get('event') # ________________________________ try: user_trans_dict = transactions.find_one({'user_id': user})['trans'] user_trans = [p for p in user_trans_dict] # find user transactions user_trans_dict[product] = event except: user_trans_dict = {product: event} user_trans = [] user_trans.append(product) # _______________________________ try: recommend_list = TopProducts.gennerate_recommend( TOP_PRODUCT, user_trans) new_products = NewProducts.gennerate_recommend( NEW_ITEM_LIST, user_trans) recommend_list = RecommendationSys.add_to_recommend_list( recommend_list, new_products, 5, 1) content_based_re = ContentBased.gennerate_recommend( product, user_trans) recommend_list = RecommendationSys.add_to_recommend_list( recommend_list, content_based_re, 1, 2) collaborative_model.online_learning(user, product, event) collaborative_re = collaborative_model.genarate_recommend( user, user_trans) recommend_list = RecommendationSys.add_to_recommend_list( recommend_list, collaborative_re, 3, 2) association_re = association_model.gennerate_recommend( product, user_trans) recommend_list = RecommendationSys.add_to_recommend_list( recommend_list, association_re, 0, 1) # update user(total+=1), user_recommend, update product(rank+=1) user_recommend.update_one({"user_id": user}, {"$set": { "products": recommend_list }}, upsert=True) transactions.update_one({"user_id": user}, { "$set": { "trans": user_trans_dict }, "$inc": { "total": 1 } }, upsert=True) products.update_one({"product_id": product}, {"$inc": {"rank": 1}}) user_product_event.delete_one({ "user_id": user, "product_id": product }) except Exception as e: print(e) print("BUG AT", datetime.now().strftime("%H:%M:%S"), "user:"******"product_id:", product) user_product_event.delete_one({ "user_id": user, "product_id": product }) continue