def TestUserCFMult(): start = time.time() file_path = '~/file/rs/dataset/ml-1m/ratings.dat' d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 K = [5, 10, 20, 40, 80, 120] # K = [40, 80] # seeds = np.arange(M) seeds = [0] columns_list = ['Precision', 'Recall', 'Coverage', 'Popularity'] d = pd.DataFrame(np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) for index, seed in enumerate(seeds): train, test = eva.SplitData(d_file, M, seed) item_popularity = eva.ItemsPopularity(d_file, M, seed) W_user = UserCF.UserSimilarityVersion3(train) q = Queue() for k in K: pw = Process(target=Evaluation, args=(q, k, train, test, item_popularity, W_user, N)) pw.start() # 启动写 pr = Process(target=WriteIntoD, args=(q, d)) pr.start() # 启动读 pw.join() # 等待pw结束 end = time.time() print('Total Time: %.2fs' % (end - start)) pr.join() # 强制结束读
def TestRandomMostPopupar(): file_path = '~/file/rs/dataset/ml-1m/ratings.dat' d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 # seeds = np.arange(M) seeds = [0] columns_list = ['Precision', 'Recall', 'Coverage', 'Popularity'] d = pd.DataFrame(np.zeros([2, len(columns_list)]), index=['Random', 'MostPopular'], columns=columns_list) for index, seed in enumerate(seeds): train, test = eva.SplitData(d_file, M, seed) item_popularity = eva.ItemsPopularity(d_file, M, seed) precision, recall, coverage, popularity = eva.RandomResult( train, test, item_popularity, N) d.loc['Random', columns_list] += [precision, recall, coverage, popularity] d.loc['Random'] /= (index + 1) precision, recall, coverage, popularity = eva.MostPopularResult( train, test, item_popularity, N) d.loc['MostPopular', columns_list] += [precision, recall, coverage, popularity] d.loc['MostPopular'] /= (index + 1) print(d) d.to_excel('Result-Random-Popular.xlsx', 'Random-Popular')
def main(): # 读取数据集 file_path = '~/file/rs/dataset/ml-1m/ratings.dat' d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 K = [5, 10, 20, 40, 80, 120, 160] # seeds = np.arange(M) seeds = [0] columns_list = [ 'PrecisionUserCF', 'PrecisionItemCF', 'RecallUserCF', 'RecallItemCF', 'CoverageUserCF', 'CoverageItemCF', 'PopularityUserCF', 'PopularityItemCF' ] userCF_columns = [ 'PrecisionUserCF', 'RecallUserCF', 'CoverageUserCF', 'PopularityUserCF' ] itemCF_columns = [ 'PrecisionItemCF', 'RecallItemCF', 'CoverageItemCF', 'PopularityItemCF' ] d = pd.DataFrame(np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) for index, seed in enumerate(seeds): train, test = eva.SplitData(d_file, M, seed) item_popularity = eva.ItemsPopularity(d_file, M, seed) W_user = UserCF.UserSimilarityVersion3(train) W_item = ItemCF.ItemSimilarityVersion2(train) for k in K: precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity( train, test, item_popularity, k, W_user, N, 1) d.loc[k, userCF_columns] += [precision, recall, coverage, popularity] precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity( train, test, item_popularity, k, W_item, N, 2) d.loc[k, itemCF_columns] += [precision, recall, coverage, popularity] d.loc[k] /= (index + 1) d.to_excel('Result-UserCF-ItemCF-K.xlsx', 'UserCF-ItemCF-K') fig, axes = plt.subplots(2, 2) axes[0][0].set_title('Precision') d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-']) axes[0][1].set_title('Recall') d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-']) axes[1][0].set_title('Coverage') d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-']) axes[1][1].set_title('Popularity') d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-']) plt.show()
def TestUserCF(): # 读取数据集 file_path = '~/file/rs/dataset/ml-1m/ratings.dat' d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 # K = [5, 10] K = [5, 10, 20, 40, 80, 120, 160] # seeds = np.arange(M) seeds = [0] columns_list = ['Precision', 'Recall', 'Coverage', 'Popularity'] userCF_columns = ['Precision', 'Recall', 'Coverage', 'Popularity'] d = pd.DataFrame(np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) for index, seed in enumerate(seeds): train, test = eva.SplitData(d_file, M, seed) item_popularity = eva.ItemsPopularity(d_file, M, seed) W_user = UserCF.UserSimilarityVersion3(train) for k in K: print(k) precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity( train, test, item_popularity, k, W_user, N, 1) d.loc[k, userCF_columns] += [precision, recall, coverage, popularity] d.loc[k] /= (index + 1) d.to_excel('Result-UserCF-K.xlsx', 'UserCF-K') fig, axes = plt.subplots(2, 2) axes[0][0].set_title('Precision') axes[0][0].plot(d.iloc[:, 0], 'o-', label='precision') axes[0][1].set_title('Recall') axes[0][1].plot(d.iloc[:, 1], 'o-', label='recall') axes[1][0].set_title('Coverage') axes[1][0].plot(d.iloc[:, 2], 'o-', label='coverage') axes[1][1].set_title('Popularity') axes[1][1].plot(d.iloc[:, 3], 'o-', label='popularity') plt.legend() plt.show()
def test_recommend(): """ 对比原版推荐算法和改进后的推荐算法 Desc: Args: Returns: """ file_path = '~/file/rs/dataset/ml-1m/ratings.dat' d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 K = [5, 10, 20, 40, 80, 120, 160] # K = [5, 10, 20, 40] train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 0) # 0: seed # W = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user) W = ItemCF.ItemSimilarityVersion2(train, item_popularity, items_user) columns_list = [ 'Precision-I', 'Precision-II', 'Recall-I', 'Recall-II', 'Coverage-I', 'Coverage-II', 'Popularity-I', 'Popularity-II' ] I_columns = [ 'Precision-I', 'Recall-I', 'Coverage-I', 'Popularity-I' ] II_columns = [ 'Precision-II', 'Recall-II', 'Coverage-II', 'Popularity-II' ] d = pd.DataFrame( np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) # ItemCF p = Pool(4) resultI = dict() resultII = dict() for k in K: resultI[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W, N, 1)) resultII[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W, N, 2)) p.close() p.join() # 等待所有子进程执行完毕 for k, v in resultI.items(): d.loc[k, I_columns] += v.get() for k, v in resultII.items(): d.loc[k, II_columns] += v.get() d.to_excel('Result-ItemCF-Recommend-K.xlsx', 'ItemCF-K') fig, axes = plt.subplots(2, 2) axes[0][0].set_title('Precision') d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-']) axes[0][1].set_title('Recall') d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-']) axes[1][0].set_title('Coverage') d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-']) axes[1][1].set_title('Popularity') d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-']) plt.legend() plt.show()
def TestItemCF_Norm(): """ 对比 ItemCF 和 ItemCF-Norm Desc: Args: Returns: """ file_path = '~/file/rs/dataset/ml-1m/ratings.dat' start = time.time() d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 K = [5, 10, 20, 40, 80, 120, 160] train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 0) # 0: seed W_ItemCF = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user) W_Norm = ItemCF.ItemSimilarityNorm(train, item_popularity, items_user) columns_list = [ 'Precision-ItemCF', 'Precision-Norm', 'Recall-ItemCF', 'Recall-Norm', 'Coverage-ItemCF', 'Coverage-Norm', 'Popularity-ItemCF', 'Popularity-Norm' ] I_columns = [ 'Precision-ItemCF', 'Recall-ItemCF', 'Coverage-ItemCF', 'Popularity-ItemCF' ] II_columns = [ 'Precision-Norm', 'Recall-Norm', 'Coverage-Norm', 'Popularity-Norm' ] d = pd.DataFrame( np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) # ItemCF p = Pool(4) resultItemCF = dict() resultNorm = dict() for k in K: resultItemCF[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W_ItemCF, N)) resultNorm[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W_Norm, N)) p.close() p.join() # 等待所有子进程执行完毕 for k, v in resultItemCF.items(): d.loc[k, I_columns] += v.get() for k, v in resultNorm.items(): d.loc[k, II_columns] += v.get() end = time.time() print('total time: %.2fs' % (end - start)) d.to_excel('Result-ItemCF-Norm-K.xlsx', 'ItemCF-K') fig, axes = plt.subplots(2, 2) axes[0][0].set_title('Precision') d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-']) axes[0][1].set_title('Recall') d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-']) axes[1][0].set_title('Coverage') d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-']) axes[1][1].set_title('Popularity') d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-']) plt.legend() plt.show()