Python ContentBased示例，content_based.ContentBased Python示例

示例#1

0

显示文件

def main():
    #0. global variable
    global RECOMMEND_FOR_NEW_USER
    global NEW_ITEM_LIST
    global TOP_PRODUCT
    global collaborative_model
    global association_model
    # cold start:
    #1. Content Based:
    print("COLD START CONTENT BASED")
    ContentBased.cold_start()
    #2. Collaborative:
    print("COLD START COLLABORATIVE")
    collaborative_model.cold_start()
    #3. Association:
    print("COLD START ASSOCIATION")
    association_model.cold_start()
    #4. NEW_ITEM, ITEM_FOR_NEW_USER
    NEW_ITEM_LIST = NewProducts.update_new_item_list()
    TOP_PRODUCT = TopProducts.update_top_product()
    RECOMMEND_FOR_NEW_USER = update_recommend_new_user()
    #5. schedule
    schedule.every().day.at("02:00").do(job_shedule)
    # CODE #
    while (1):
        # update_for_new_user every 3 A.M
        schedule.run_pending()
        get_action_make_recommend()

示例#2

0

显示文件

def home():
    if "recommend" in request.args:
        try:
            title = request.args["recommend"]
            rec = ContentBased()
            did_you_mean = False
            df = rec.recommend(title,
                               DEFAULT_LIMIT,
                               full_search=True,
                               keywords_and_desc=False,
                               critics=False)
            poster_paths = get_poster_paths(df["id"].tolist(),
                                            df["original_title"].tolist())
            if rec.changed_title != title and rec.changed_title != str():
                did_you_mean = True
            else:
                rec.changed_title = title
            rec_title_meta = get_meta(rec.changed_title, None)
            rec_id = rec_title_meta[0]["id"]

            return render_template('recommendations.html',
                                   titles=df["original_title"].tolist(),
                                   images=poster_paths,
                                   votes=df["vote_average"].tolist(),
                                   m_id=df["id"].tolist(),
                                   rec_title=rec.changed_title,
                                   rec_id=rec_id,
                                   did_you_mean=did_you_mean)
        except:
            abort(404)
    elif "genres" in request.args:
        genre = request.args["genres"]
        if genre == "All":
            genre = None
        offset = int(request.args["offset"])

        gen_rec = Recommendation()
        gen_rec.filter_genres()
        df = gen_rec.top_movies(gen_rec.md,
                                percentile=0.85,
                                limit=DEFAULT_LIMIT,
                                offset=offset,
                                genre=genre)
        poster_paths = get_poster_paths(df["id"].tolist(),
                                        df["original_title"].tolist())

        return render_template('recommendations.html',
                               titles=df["original_title"].tolist(),
                               images=poster_paths,
                               votes=df["vote_average"].tolist(),
                               m_id=df["id"].tolist(),
                               rec_title=request.args["genres"],
                               offset=offset,
                               next_offset=offset + DEFAULT_LIMIT,
                               prev_offset=offset - DEFAULT_LIMIT,
                               rec_id=None,
                               did_you_mean=None)
    else:
        return render_template('homepage.html')

示例#3

0

显示文件

    def setUp(self):
        self.client = MongoClient('mongodb://localhost:27017')
        self.db = self.client['moviesDB']
        self.collection = self.db['movies']
        self.test_df = DataFrame(list(self.collection.find()))

        self.valid_cb = ContentBased("_id", "5f96e0b8639a72229e0fd042",
                                     self.test_df,
                                     ["Title", "Genre", "Director", "Actors"])
        self.recs = self.valid_cb.get_recommendations(10, "Rating")

示例#4

0

显示文件

def job_shedule():
    global RECOMMEND_FOR_NEW_USER
    global NEW_ITEM_LIST
    global TOP_PRODUCT
    global collaborative_model
    global association_model
    RECOMMEND_FOR_NEW_USER = update_recommend_new_user()
    NEW_ITEM_LIST = NewProducts.update_new_item_list()
    TOP_PRODUCT = TopProducts.update_top_product()
    ContentBased.cold_start()
    association_model.cold_start()

示例#5

0

显示文件

class testContentBased(unittest.TestCase):
    def setUp(self):
        self.client = MongoClient('mongodb://localhost:27017')
        self.db = self.client['moviesDB']
        self.collection = self.db['movies']
        self.test_df = DataFrame(list(self.collection.find()))

        self.valid_cb = ContentBased("_id", "5f96e0b8639a72229e0fd042",
                                     self.test_df,
                                     ["Title", "Genre", "Director", "Actors"])
        self.recs = self.valid_cb.get_recommendations(10, "Rating")

    def test_constructor(self):
        self.assertIsInstance(self.valid_cb, ContentBased)
        self.assertRaises(TypeError, ContentBased, "_id", None, None, None)
        self.assertRaises(TypeError, ContentBased, None,
                          "5f96e0b8639a72229e0fd042", None, None)
        self.assertRaises(TypeError, ContentBased, None, self.test_df, None,
                          None)
        self.assertRaises(TypeError, ContentBased, None, None, None, [])
        self.assertRaises(TypeError, ContentBased, None, None, None, None)
        # Since the consructor call the sort_frame() function these tests serve to test that as well
        self.assertRaises(KeyError, ContentBased, "_id",
                          "5f96e0b8639a72229e0fd042", self.test_df,
                          ["Title", "Ge", "Director", "Actors"])
        self.assertRaises(Exception, ContentBased, "_id", "blah123",
                          self.test_df,
                          ["Title", "Genre", "Director", "Actors"])

    def test_get_recommendations(self):
        self.assertIsInstance(self.recs, list)
        self.assertIsInstance(self.recs[0], dict)
        self.assertRaises(KeyError, self.valid_cb.get_recommendations, 10,
                          "Bad Value")

示例#6

0

显示文件

def movie_meta():
    if "title" in request.args:
        try:
            title = request.args["title"]
            m_id = request.args["id"]
            df_meta = get_meta(title, m_id)
            poster_path = get_poster_paths([int(m_id)], [title])[title]

            rec = ContentBased()
            df_rec = rec.recommend(title,
                                   5,
                                   full_search=True,
                                   keywords_and_desc=False,
                                   critics=False)
            rec_poster_paths = get_poster_paths(
                df_rec["id"].tolist(),
                df_rec["original_title"].tolist(),
                small=True)

            return render_template(
                'meta.html',
                title=df_meta[0]["original_title"],
                genres=df_meta[0]["genres"],
                homepage=df_meta[0]["homepage"],
                overview=df_meta[0]["overview"],
                release=df_meta[0]["release_date"],
                production=df_meta[0]["production_companies"],
                runtime=df_meta[0]["runtime"],
                tagline=df_meta[0]["tagline"],
                vote_average=df_meta[0]["vote_average"],
                vote_count=df_meta[0]["vote_count"],
                cast=df_meta[1],
                director=df_meta[2],
                poster_path=poster_path,
                rec_posters=rec_poster_paths,
                rec_titles=df_rec["original_title"].tolist(),
                rec_m_ids=df_rec["id"].tolist(),
                imdb_id=df_meta[3])
        except:
            abort(404)
    else:
        abort(404)

示例#7

0

显示文件

文件： hybrid.py 项目： pncnmnp/Movie-Recommendation

    def get_recommendation(self,
                           movie,
                           review,
                           critics=False,
                           full_search=False,
                           use_pickle=True):
        """
            For hybrid recommendations: LIMIT (instance var) determines no. of movies outputted
            param: movie - title of the movie (as mentioned in DB)
                   review - rating of the movie on the scale of 1-5
                   critics - (True or False type) Critically acclaimed recommendations
                   full_search - True: Recommendations generated using keywords, cast, crew and genre
                                 False: Recommendations generated on basis of tagline and overview

            return: pandas DataFrame object with attributes -
                    title, id, vote_average, vote_count, popularity, release_date

            Recommendations which have frequency greater than 1 in both 
            collaborative and content based filtering results are chosen 
            as result. If the total result found are less than limit than 
            the difference is divided into a ratio of 2:1, for content based 
            and collaborative results. i.e Out of the remaining results, 
            2x of them will be content based and 1x collaborative based. 
		"""
        rec_content_obj, rec_coll_obj = ContentBased(), CollaborativeFiltering(
        )
        rec_content = rec_content_obj.recommend(movie, self.LIMIT, critics,
                                                full_search, use_pickle)
        rec_content = self.convert_literal_eval(
            rec_content.to_json(orient="records", lines=True))
        print("Content Filtering completed.....")

        rec_coll_obj.LIMIT = 1000
        rec_coll = rec_coll_obj.user_model({movie: review})
        rec_coll = self.convert_literal_eval(
            rec_coll.to_json(orient="records", lines=True))
        print("Collaborative Filtering completed.....")

        movies_freq = Counter(
            list([movie["title"] for movie in rec_coll]) +
            list([movie["original_title"]
                  for movie in rec_content])).most_common(self.LIMIT)

        # accepting movies whose frequency is greater than 1 from collaborative and content based results
        total_movies_rec = [movie[0] for movie in movies_freq if movie[1] > 1]
        # print(total_movies_rec)
        movie_df = pd.DataFrame(columns=[
            "title",
            "id",
            "vote_average",
            "vote_count",
            "popularity",
            "release_date",
        ])
        index = 0

        for movie in total_movies_rec:
            movie_json = self.get_movie_json(movie, rec_coll, rec_content)
            movie_df.loc[index] = array(list(movie_json.values()))
            index += 1

        if len(total_movies_rec) < self.LIMIT:
            rec_content_cutoff = (
                (self.LIMIT - len(total_movies_rec)) * 2) // 3
            start_index = index
            rec_title_name = {0: "original_title", 1: "title"}
            curr_rec = 0  # as we start with content based results
            for rec in [rec_content, rec_coll]:
                for movie in rec:
                    if (movie[rec_title_name[curr_rec]]
                            not in movie_df["title"].tolist()):
                        movie_df.loc[index] = array(list(movie.values())[:6])
                        index += 1
                    if start_index + rec_content_cutoff == index and rec == rec_content:
                        curr_rec = 1
                        break
                    elif index == self.LIMIT:
                        break

                if index == self.LIMIT:
                    break

        print("Hybrid Filtering completed.....")
        return movie_df

示例#8

0

显示文件

def get_action_make_recommend():
    global collaborative_model
    global association_model
    recommend_list = []
    #     recommend list has 6 elements
    # print("start at", datetime.now().strftime("%H:%M:%S"))
    actions = user_product_event.find()
    for action in actions:
        #         take information of this action
        user = action.get('user_id')
        product = action.get('product_id')
        event = action.get('event')
        #       ________________________________
        try:
            user_trans_dict = transactions.find_one({'user_id': user})['trans']
            user_trans = [p for p in user_trans_dict]  # find user transactions
            user_trans_dict[product] = event
        except:
            user_trans_dict = {product: event}
            user_trans = []
        user_trans.append(product)
        #       _______________________________
        try:
            recommend_list = TopProducts.gennerate_recommend(
                TOP_PRODUCT, user_trans)
            new_products = NewProducts.gennerate_recommend(
                NEW_ITEM_LIST, user_trans)
            recommend_list = RecommendationSys.add_to_recommend_list(
                recommend_list, new_products, 5, 1)
            content_based_re = ContentBased.gennerate_recommend(
                product, user_trans)
            recommend_list = RecommendationSys.add_to_recommend_list(
                recommend_list, content_based_re, 1, 2)
            collaborative_model.online_learning(user, product, event)
            collaborative_re = collaborative_model.genarate_recommend(
                user, user_trans)
            recommend_list = RecommendationSys.add_to_recommend_list(
                recommend_list, collaborative_re, 3, 2)
            association_re = association_model.gennerate_recommend(
                product, user_trans)
            recommend_list = RecommendationSys.add_to_recommend_list(
                recommend_list, association_re, 0, 1)
            #       update user(total+=1), user_recommend, update product(rank+=1)
            user_recommend.update_one({"user_id": user},
                                      {"$set": {
                                          "products": recommend_list
                                      }},
                                      upsert=True)
            transactions.update_one({"user_id": user}, {
                "$set": {
                    "trans": user_trans_dict
                },
                "$inc": {
                    "total": 1
                }
            },
                                    upsert=True)
            products.update_one({"product_id": product}, {"$inc": {"rank": 1}})
            user_product_event.delete_one({
                "user_id": user,
                "product_id": product
            })
        except Exception as e:
            print(e)
            print("BUG AT",
                  datetime.now().strftime("%H:%M:%S"), "user:"******"product_id:", product)
            user_product_event.delete_one({
                "user_id": user,
                "product_id": product
            })
            continue