def _online_similar_user_model(product_category, review_score): """ Trains KNNBasic algorithm to find similar users. Parameters ---------- product_category: str. review_score: int. Returns ------- trainset, new_user_raw_id, new_user_neighbor_raw_id """ user_rec_sys_data = pd.read_sql_table("user_rec_sys", engine, index_col="index") updated_data, new_user_raw_id = _add_new_user(user_rec_sys_data, product_category, review_score) review_score_values = list( updated_data["review_score"].value_counts().index) # Defining Data Object. reader = Reader(rating_scale=(min(review_score_values), max(review_score_values))) data = Dataset.load_from_df(updated_data, reader) trainset = data.build_full_trainset() # Training. random.seed(0) np.random.seed(0) sim_options = {"name": "msd", "user_based": True} algo = KNNWithMeans(sim_options=sim_options, verbose=False) algo.fit(trainset) # Find Similar User. new_user_inner_id = trainset.to_inner_uid(new_user_raw_id) new_user_neighbor_inner_id = algo.get_neighbors(new_user_inner_id, k=1) new_user_neighbor_raw_id = new_user_neighbor_inner_id[0] return trainset, new_user_raw_id, new_user_neighbor_raw_id
def model_training_and_evalution(): print "欢迎来到 训练阶段" file_path=os.path.expanduser(r'E:\JiangIntellijWorkingSpace\tools\music_recommendation\transform_playlist_song_rating.txt') reader=Reader(line_format='user item rating',sep='\t') music_data=Dataset.load_from_file(file_path,reader=reader) print("构建数据集") trainset=music_data.build_full_trainset() print"开始训练模型....." sim_options={'name':'pearson_baseline','user_based':False} algo=KNNWithMeans(sim_options) algo.train(trainset) rid_to_name,name_to_rid=read_item_names() # print name_to_rid toy_story_raw_id=name_to_rid[u'Over The Horizon-SAMSUNG GALAXY THEME'] # toy_story_raw_id=423245641 print toy_story_raw_id toy_story_inner_id=algo.trainset.to_inner_iid(toy_story_raw_id) toy_story_neighbors=algo.get_neighbors(toy_story_inner_id,k=10) toy_story_neighbors=(algo.trainset.to_raw_iid(inner_id)for inner_id in toy_story_neighbors) toy_story_neighbors=(rid_to_name[rid]for rid in toy_story_neighbors) print('the 10 nearest neighbors of it are(为你推荐最相近的10首歌单):') for music in toy_story_neighbors: print music
#algo = KNNBaseline(sim_options=sim_options) algo = KNNWithMeans() algo.train(trainset) current_playlist = list(name_id_dic.keys())[39] print "歌单名称", current_playlist # 取出近邻 # 映射名字到id playlist_id = name_id_dic[current_playlist] print "歌单id", playlist_id # 取出来对应的内部user id => to_inner_uid playlist_inner_id = algo.trainset.to_inner_uid(playlist_id) print "内部id", playlist_inner_id playlist_neighbors = algo.get_neighbors(playlist_inner_id, k=10) # 把歌曲id转成歌曲名字 # to_raw_uid映射回去 playlist_neighbors = (algo.trainset.to_raw_uid(inner_id) for inner_id in playlist_neighbors) playlist_neighbors = (id_name_dic[playlist_id] for playlist_id in playlist_neighbors) print "和歌单 《", current_playlist, "》 最接近的10个歌单为:\n" for playlist in playlist_neighbors: print playlist, algo.trainset.to_inner_uid(name_id_dic[playlist]) # 重建歌曲id到歌曲名的映射字典 song_id_name_dic = pickle.load(open("data/song.pkl", "rb"), encoding='utf-8') print "加载歌曲id到歌曲名的映射字典完成..."
""" 构造id字典 """ rid_to_name[line[0]] = line[1] name_to_rid[line[1]] = line[0] return rid_to_name, name_to_rid # 首先,用算法计算相互间的相似度 file_path = os.path.expanduser( r'E:\JiangIntellijWorkingSpace\tools\music_recommendation\ml-100k\u.data') # 告诉reader,文本的格式是怎么样的 reader = Reader(line_format='user item rating timestamp', sep='\t') data = Dataset.load_from_file(file_path, reader=reader) trainset = data.build_full_trainset() sim_options = {'name': 'pearson_baseline', 'user_based': False} algo = KNNWithMeans(sim_options) algo.train(trainset) # 获取电影名到电影id和电影id到电影名的映射 rid_to_name, name_to_rid = read_item_names() toy_story_raw_id = name_to_rid['Toy Story (1995)'] toy_story_inner_id = algo.trainset.to_inner_iid(toy_story_raw_id) toy_story_neighbors = algo.get_neighbors(toy_story_inner_id, k=10) toy_story_neighbors = (algo.trainset.to_raw_iid(inner_id) for inner_id in toy_story_neighbors) toy_story_neighbors = (rid_to_name[rid] for rid in toy_story_neighbors) print('the 10 nearest neighbors of it are(为你推荐最相近的10首电影):') for movie in toy_story_neighbors: print movie