Пример #1
0
def _online_similar_user_model(product_category, review_score):
    """
    Trains KNNBasic algorithm to find similar users.

    Parameters
    ----------
        product_category: str.
        review_score: int.

    Returns
    -------
        trainset, new_user_raw_id, new_user_neighbor_raw_id
    """

    user_rec_sys_data = pd.read_sql_table("user_rec_sys",
                                          engine,
                                          index_col="index")

    updated_data, new_user_raw_id = _add_new_user(user_rec_sys_data,
                                                  product_category,
                                                  review_score)
    review_score_values = list(
        updated_data["review_score"].value_counts().index)

    # Defining Data Object.
    reader = Reader(rating_scale=(min(review_score_values),
                                  max(review_score_values)))
    data = Dataset.load_from_df(updated_data, reader)
    trainset = data.build_full_trainset()

    # Training.
    random.seed(0)
    np.random.seed(0)

    sim_options = {"name": "msd", "user_based": True}

    algo = KNNWithMeans(sim_options=sim_options, verbose=False)
    algo.fit(trainset)

    # Find Similar User.
    new_user_inner_id = trainset.to_inner_uid(new_user_raw_id)
    new_user_neighbor_inner_id = algo.get_neighbors(new_user_inner_id, k=1)
    new_user_neighbor_raw_id = new_user_neighbor_inner_id[0]
    return trainset, new_user_raw_id, new_user_neighbor_raw_id
Пример #2
0
def model_training_and_evalution():
    print "欢迎来到 训练阶段"
    file_path=os.path.expanduser(r'E:\JiangIntellijWorkingSpace\tools\music_recommendation\transform_playlist_song_rating.txt')
    reader=Reader(line_format='user item rating',sep='\t')
    music_data=Dataset.load_from_file(file_path,reader=reader)
    print("构建数据集")
    trainset=music_data.build_full_trainset()
    print"开始训练模型....."
    sim_options={'name':'pearson_baseline','user_based':False}
    algo=KNNWithMeans(sim_options)
    algo.train(trainset)
    rid_to_name,name_to_rid=read_item_names()
    # print name_to_rid
    toy_story_raw_id=name_to_rid[u'Over The Horizon-SAMSUNG GALAXY THEME']
    # toy_story_raw_id=423245641
    print toy_story_raw_id
    toy_story_inner_id=algo.trainset.to_inner_iid(toy_story_raw_id)
    toy_story_neighbors=algo.get_neighbors(toy_story_inner_id,k=10)
    toy_story_neighbors=(algo.trainset.to_raw_iid(inner_id)for inner_id in toy_story_neighbors)
    toy_story_neighbors=(rid_to_name[rid]for rid in toy_story_neighbors)
    print('the 10 nearest neighbors of it are(为你推荐最相近的10首歌单):')
    for music in toy_story_neighbors:
        print music
Пример #3
0
#algo = KNNBaseline(sim_options=sim_options)
algo = KNNWithMeans()
algo.train(trainset)

current_playlist = list(name_id_dic.keys())[39]
print "歌单名称", current_playlist

# 取出近邻
# 映射名字到id
playlist_id = name_id_dic[current_playlist]
print "歌单id", playlist_id
# 取出来对应的内部user id => to_inner_uid
playlist_inner_id = algo.trainset.to_inner_uid(playlist_id)
print "内部id", playlist_inner_id

playlist_neighbors = algo.get_neighbors(playlist_inner_id, k=10)

# 把歌曲id转成歌曲名字
# to_raw_uid映射回去
playlist_neighbors = (algo.trainset.to_raw_uid(inner_id)
                      for inner_id in playlist_neighbors)
playlist_neighbors = (id_name_dic[playlist_id]
                      for playlist_id in playlist_neighbors)

print "和歌单 《", current_playlist, "》 最接近的10个歌单为:\n"
for playlist in playlist_neighbors:
    print playlist, algo.trainset.to_inner_uid(name_id_dic[playlist])

# 重建歌曲id到歌曲名的映射字典
song_id_name_dic = pickle.load(open("data/song.pkl", "rb"), encoding='utf-8')
print "加载歌曲id到歌曲名的映射字典完成..."
Пример #4
0
            """
            构造id字典
            """
            rid_to_name[line[0]] = line[1]
            name_to_rid[line[1]] = line[0]

    return rid_to_name, name_to_rid


# 首先,用算法计算相互间的相似度
file_path = os.path.expanduser(
    r'E:\JiangIntellijWorkingSpace\tools\music_recommendation\ml-100k\u.data')
# 告诉reader,文本的格式是怎么样的
reader = Reader(line_format='user item rating timestamp', sep='\t')
data = Dataset.load_from_file(file_path, reader=reader)
trainset = data.build_full_trainset()
sim_options = {'name': 'pearson_baseline', 'user_based': False}
algo = KNNWithMeans(sim_options)
algo.train(trainset)
# 获取电影名到电影id和电影id到电影名的映射
rid_to_name, name_to_rid = read_item_names()
toy_story_raw_id = name_to_rid['Toy Story (1995)']
toy_story_inner_id = algo.trainset.to_inner_iid(toy_story_raw_id)
toy_story_neighbors = algo.get_neighbors(toy_story_inner_id, k=10)
toy_story_neighbors = (algo.trainset.to_raw_iid(inner_id)
                       for inner_id in toy_story_neighbors)
toy_story_neighbors = (rid_to_name[rid] for rid in toy_story_neighbors)
print('the 10 nearest neighbors of it are(为你推荐最相近的10首电影):')
for movie in toy_story_neighbors:
    print movie