def mf_bpr_rec(is_test): print('*** Test MF-BPR Recommender ***') ev = Evaluator(is_test=is_test) ev.split() rec = MFRec.MFRec() MfRec = MFBPR.MFBPR(ev.get_URM_train(), nnz=0.1, n_factors=20, learning_rate=0.01, epochs=100, user_regularization=0.1, positive_item_regularization=0.1, negative_item_regularization=0.1) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, MfRec, is_test) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('Hybrid MAP@10:', map5) return map5 else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/sub.csv", sep=',', index=False) return 0
def hybrid_rec(is_test): print('*** Test Hybrid Recommender ***') b = Builder() ev = Evaluator(is_test=is_test) ev.split() rec = HybridRec.HybridRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 600) S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) Slim = SlimBPR.SlimBPR(ev.get_URM_train(), epochs=1, learning_rate=0.01, positive_item_regularization=1, negative_item_regularization=1).get_S_SLIM_BPR(500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, S_UCM, Slim, is_test, alfa=0.3, avg=0.3) """ 0.30, 0.30 alfa*((1-avg)*collab + avg*content) + (1-alfa)*slimBPR only collab con knn=500 0.09080017548893707 knn=600 0.09085745115462485 only content knn=250 0.05537121844924659 knn=300 0.055101704695727706 only slim con lr=0.01 epoch=1 0.09087007071213243 lr=0.001 epoch=8 0.09346656108877179 content+collab con avg=0.20 0. avg=0.30 0.09762916809334841 all together con alfa=0.40 0.10715025718387602 alfa=0.30 0.1082252839472891 """ train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('Hybrid MAP@10:', map5) return map5 else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/sub.csv", sep=',', index=False) return 0
def mf_als_rec(is_test): print('*** Test MF-ALS Recommender ***') conf = SparkConf().setAppName("MF-ALS Rec").setMaster("local") sc = SparkContext(conf=conf) b = Builder() ev = Evaluator(is_test=is_test) ev.split() UCM = b.get_UCM(ev.get_URM_train()) target_playlists = ev.get_target_playlists() urm_train_indices = ev.get_URM_train().nonzero() ratings_list = [] print('Creating RDD of tuples') for index in tqdm(range(0, urm_train_indices[0].size)): ratings_list.append( Rating(urm_train_indices[0][index], urm_train_indices[1][index], 1)) ratings = sc.parallelize(ratings_list) model = ALS.trainImplicit(ratings, rank=10, iterations=5, alpha=0.01) dataframe_list = [] print('Predicting...', flush=True) all_predictions = model.recommendProductsForUsers(10).filter(lambda r: r[0] in target_playlists)\ .collect() for u in tqdm(all_predictions): prediction = [] for i in u[1]: prediction.append(i.product) dataframe_list.append([u[0], prediction]) def get_id(e): return e[0] dataframe_list.sort(key=get_id) train_df = pd.DataFrame(dataframe_list, columns=['playlist_id', 'track_ids']) if is_test: map5 = ev.map5(train_df) print('Hybrid MAP@10:', map5) return map5 else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/sub.csv", sep=',', index=False) return 0
def round_robin_rec(is_test, avg_mode): print('*** Test Round Robin Recommender ***') b = Builder() ev = Evaluator(is_test=is_test) ev.split() rec = RoundRobinRec.RoundRobinRec() S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 500) Slim = SlimBPR.SlimBPR(ev.get_URM_train()).get_S_SLIM_BPR(500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, S_UCM, Slim, is_test, mode="jump", a=3, b=1, c=1) if avg_mode: train_df = rec.recommend_avg() else: train_df = rec.recommend_rr() if is_test: map5 = ev.map5(train_df) print('RoundRobin MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/sub.csv", sep=',', index=False)
def collaborative_filtering(is_test): print('*** Test Collaborative Filtering Recommender ***') b = Builder() ev = Evaluator(is_test=is_test) ev.split() rec = CollaborativeFilteringRec.CollaborativeFilteringRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_UCM, is_test) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('CollaborativeFiltering MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/subCollab.csv", sep=',', index=False)
def hybrid_rec(is_test): print('*** Test Hybrid Recommender ***') b = Builder() ev = Evaluator() ev.split() rec = HybridRec.HybridRec() S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 500) Slim = SlimBPR.SlimBPR(ev.get_URM_train()).get_S_SLIM_BPR(500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, S_UCM, Slim, True, 0.20, 0.74) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('Hybrid MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('Hybrid.csv', sep=',', index=False)
def slim_BPR(is_test): print('*** Test Slim BPR Recommender ***') ev = Evaluator() ev.split() rec = SlimBPRRec.SlimBPRRec() rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, 0.1, 1, 1.0, 1.0, 1000, 1, is_test) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('SlimBPR MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('SlimBPR.csv', sep=',', index=False)
def item_based(is_test): print('*** Item Based Recommender ***') b = Builder() ev = Evaluator() ev.split() rec = ItemBasedRec.ItemBasedRec() S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, is_test) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('ItemBased MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('ItemBased.csv', sep=',', index=False)
def collaborative_filtering(is_test): print('*** Test Collaborative Filtering Recommender ***') b = Builder() ev = Evaluator() ev.split() rec = CollaborativeFilteringRec.CollaborativeFilteringRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(b.get_URM()), 500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_UCM, True) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('CollaborativeFiltering MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('CollaborativeFiltering.csv', sep=',', index=False)
def item_user_avg(is_test): print('*** Test Item User Avg Recommender ***') b = Builder() ev = Evaluator() ev.split() rec = ItemUserAvgRec.ItemUserAvgRec() S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) S_UCM = b.get_S_UCM_KNN(b.get_UCM(b.get_URM()), 500) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, S_ICM, S_UCM, True, 0.80) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('ItemUserAvg MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('ItemUserAvg.csv', sep=',', index=False)
def SVD(is_test): print('*** Test SVD Recommender ***') b = Builder() ev = Evaluator(is_test=is_test) ev.split() rec = SVDRec.SVDRec() rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, b.build_ICM(), k=10, knn=250, is_test=is_test) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('SlimBPR MAP@5:', map5) else: print('Prediction saved!') train_df.to_csv('SlimBPR.csv', sep=',', index=False)
def hybrid_repo(is_test): b = Builder() ev = Evaluator() ev.split() ICM = b.build_ICM() URM_train, URM_test = train_test_holdout(b.get_URM(), train_perc=0.8) URM_train, URM_validation = train_test_holdout(URM_train, train_perc=0.9) from ParameterTuning.AbstractClassSearch import EvaluatorWrapper from Base.Evaluation.Evaluator import SequentialEvaluator evaluator_validation = SequentialEvaluator(URM_validation, cutoff_list=[5]) evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[5, 10]) evaluator_validation = EvaluatorWrapper(evaluator_validation) evaluator_test = EvaluatorWrapper(evaluator_test) from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender from ParameterTuning.BayesianSearch import BayesianSearch recommender_class = ItemKNNCFRecommender parameterSearch = BayesianSearch(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) from ParameterTuning.AbstractClassSearch import DictionaryKeys hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [ 5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800 ] hyperparamethers_range_dictionary["shrink"] = [ 0, 10, 50, 100, 200, 300, 500, 1000 ] hyperparamethers_range_dictionary["similarity"] = ["cosine"] hyperparamethers_range_dictionary["normalize"] = [True, False] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } output_root_path = "result_experiments/" import os # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) output_root_path += recommender_class.RECOMMENDER_NAME n_cases = 2 metric_to_optimize = "MAP" best_parameters = parameterSearch.search(recommenderDictionary, n_cases=n_cases, output_root_path=output_root_path, metric=metric_to_optimize) itemKNNCF = ItemKNNCFRecommender(URM_train) itemKNNCF.fit(**best_parameters) from FW_Similarity.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg n_cases = 2 metric_to_optimize = "MAP" best_parameters_ItemKNNCBF = parameterSearch.search( recommenderDictionary, n_cases=n_cases, output_root_path=output_root_path, metric=metric_to_optimize) itemKNNCBF = ItemKNNCBFRecommender(ICM, URM_train) itemKNNCBF.fit(**best_parameters_ItemKNNCBF) """ #_____________________________________________________________________ from ParameterTuning.BayesianSearch import BayesianSearch from ParameterTuning.AbstractClassSearch import DictionaryKeys from ParameterTuning.AbstractClassSearch import EvaluatorWrapper evaluator_validation_tuning = EvaluatorWrapper(evaluator_validation) evaluator_test_tuning = EvaluatorWrapper(evaluator_test) recommender_class = CFW_D_Similarity_Linalg parameterSearch = BayesianSearch(recommender_class, evaluator_validation=evaluator_validation_tuning, evaluator_test=evaluator_test_tuning) hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800] hyperparamethers_range_dictionary["add_zeros_quota"] = range(0, 1) hyperparamethers_range_dictionary["normalize_similarity"] = [True, False] recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train, ICM, itemKNNCF.W_sparse], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary} output_root_path = "result_experiments/" import os # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) output_root_path += recommender_class.RECOMMENDER_NAME n_cases = 2 metric_to_optimize = "MAP" best_parameters_CFW_D = parameterSearch.search(recommenderDictionary, n_cases=n_cases, output_root_path=output_root_path, metric=metric_to_optimize) CFW_weithing = CFW_D_Similarity_Linalg(URM_train, ICM, itemKNNCF.W_sparse) CFW_weithing.fit(**best_parameters_CFW_D) #___________________________________________________________________________________________- """ from GraphBased.P3alphaRecommender import P3alphaRecommender P3alpha = P3alphaRecommender(URM_train) P3alpha.fit() from MatrixFactorization.PureSVD import PureSVDRecommender #pureSVD = PureSVDRecommender(URM_train) #pureSVD.fit() rec = HybridRec.HybridRec() S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 600) S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250) rec.fit(ev.get_URM_train(), ev.get_target_playlists(), ev.get_target_tracks(), ev.num_playlists_to_test, itemKNNCBF.W_sparse, itemKNNCF.W_sparse, P3alpha.W_sparse, is_test=True, alfa=0.7, avg=0.3) train_df = rec.recommend() if is_test: map5 = ev.map5(train_df) print('Hybrid MAP@10:', map5) return map5 else: print('Prediction saved!') train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] + "/all/sub.csv", sep=',', index=False) return 0 #hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_train, itemKNNCF.W_sparse, P3alpha.W_sparse) #hybridrecommender.fit(alpha=0.5) #print(evaluator_validation.evaluateRecommender(hybridrecommender)) """