def __init__(self, URM_train, ICM_train): super(ScoresHybridKNNCFKNNCBF, self).__init__(URM_train) self.URM_train = check_matrix(URM_train.copy(), 'csr') self.ICM_train = ICM_train self.itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train) self.itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender(URM_train, ICM_train)
def __init__(self, URM_train, ICM_train): super(ScoresHybridSpecializedV2Mid, self).__init__(URM_train) self.URM_train = check_matrix(URM_train.copy(), 'csr') self.ICM_train = ICM_train self.P3alpha = P3alphaRecommender.P3alphaRecommender(URM_train) self.itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_train)
"validation_metric": "MAP", } # MAP 0.07 Kaggle "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": true} #recommender = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) #recommender.fit(**{"topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True}) '''itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train) itemKNNCF.fit(**{"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF"}) itemKNNCF2 = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_ICM_train) itemKNNCF2.fit(**{"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF"}) itemKNNCF3 = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_ICM_train2.T) itemKNNCF3.fit(**{"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF"})''' itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, URM_train.T) itemKNNCBF.fit(topK=700, shrink=200, similarity='jaccard', normalize=True, feature_weighting="TF-IDF") itemKNNCBF2 = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, URM_ICM_train.T) itemKNNCBF2.fit(topK=700, shrink=200, similarity='jaccard', normalize=True, feature_weighting="TF-IDF") itemKNNCBF3 = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, URM_ICM_train2) itemKNNCBF3.fit(topK=700,
def crossval(URM_all, ICM_all, target_ids, k): seed = 1234 + k #+ int(time.time()) np.random.seed(seed) tp = 0.75 URM_train, URM_test = train_test_holdout(URM_all, train_perc=tp) ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.95) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True) args = {} p3alpha = P3alphaRecommender.P3alphaRecommender(URM_train) try: args = { "topK": 991, "alpha": 0.4705816992313091, "normalize_similarity": False } p3alpha.load_model( 'SavedModels\\', p3alpha.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") p3alpha.fit(**args) p3alpha.save_model( 'SavedModels\\', p3alpha.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) rp3beta = RP3betaRecommender.RP3betaRecommender(URM_train) try: args = { "topK": 991, "alpha": 0.4705816992313091, "beta": 0.37, "normalize_similarity": False } rp3beta.load_model( 'SavedModels\\', rp3beta.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") rp3beta.fit(**args) rp3beta.save_model( 'SavedModels\\', rp3beta.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train) try: args = { "topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCF.load_model( 'SavedModels\\', itemKNNCF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") itemKNNCF.fit(**args) itemKNNCF.save_model( 'SavedModels\\', itemKNNCF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) try: args = { "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True } userKNNCF.load_model( 'SavedModels\\', userKNNCF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") userKNNCF.fit(**args) userKNNCF.save_model( 'SavedModels\\', userKNNCF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_all) try: args = { "topK": 700, "shrink": 100, "similarity": 'jaccard', "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCBF.load_model( 'SavedModels\\', itemKNNCBF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") itemKNNCBF.fit(**args) itemKNNCBF.save_model( 'SavedModels\\', itemKNNCBF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) #cfw = CFW_D_Similarity_Linalg.CFW_D_Similarity_Linalg(URM_train, ICM_train, itemKNNCF.W_sparse) #cfw.fit(show_max_performance=False, logFile=None, loss_tolerance=1e-6, # iteration_limit=500000, damp_coeff=0.5, topK=900, add_zeros_quota=0.5, normalize_similarity=True) # Need to change bpr code to avoid memory error, useless since it's bad # bpr = SLIM_BPR_Cython(URM_train, recompile_cython=False) # bpr.fit(**{"topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05, # "lambda_j": 0.01, "learning_rate": 0.0001}) pureSVD = PureSVDRecommender.PureSVDRecommender(URM_train) pureSVD.fit(num_factors=1000) hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, p3alpha, itemKNNCBF) hyb.fit(alpha=0.5) # Kaggle MAP 0.084 rp3beta, itemKNNCBF hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, pureSVD, itemKNNCBF) hyb2.fit(alpha=0.5) # Kaggle MAP 0.08667 hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, hyb, hyb2) hyb3.fit(alpha=0.5) #hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, userKNNCF) #hyb3.fit(alpha=0.5) hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF( URM_train, ICM_all) # Kaggle MAP 0.08856 try: # Full values: "alpha_P": 0.4108657561671193, "alpha": 0.6290871066510789 args = { "topK_P": 903, "alpha_P": 0.41086575, "normalize_similarity_P": False, "topK": 448, "shrink": 20, "similarity": "tversky", "normalize": True, "alpha": 0.6290871, "feature_weighting": "TF-IDF" } hyb5.load_model( 'SavedModels\\', hyb5.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") hyb5.fit(**args) hyb5.save_model( 'SavedModels\\', hyb5.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) # hyb5.fit(**{"topK_P": 1000, "alpha_P": 0.5432601071314623, "normalize_similarity_P": True, "topK": 620, "shrink": 0, # "similarity": "tversky", "normalize": False, "alpha": 0.5707347522847057, "feature_weighting": "BM25"}) # Kaggle MAP 0.086 :( #hyb6 = ScoresHybrid3Recommender.ScoresHybrid3Recommender(URM_train, rp3beta, itemKNNCBF, p3alpha) #hyb6.fit() hyb6 = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF( URM_train, ICM_all) try: # Full values: "alpha_P": 0.5081918012150626, "alpha": 0.44740093610861603 args = { "topK_P": 623, "alpha_P": 0.5081918, "normalize_similarity_P": False, "topK": 1000, "shrink": 1000, "similarity": "tversky", "normalize": True, "alpha": 0.4474009, "beta_P": 0.0, "feature_weighting": "TF-IDF" } hyb6.load_model( 'SavedModels\\', hyb6.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") hyb6.fit(**args) hyb6.save_model( 'SavedModels\\', hyb6.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) v0 = evaluator_validation.evaluateRecommender(hyb)[0][10]["MAP"] v1 = evaluator_validation.evaluateRecommender(hyb2)[0][10]["MAP"] v2 = evaluator_validation.evaluateRecommender(hyb3)[0][10]["MAP"] v3 = evaluator_validation.evaluateRecommender(hyb5)[0][10]["MAP"] v4 = evaluator_validation.evaluateRecommender(hyb6)[0][10]["MAP"] #item_list = hyb3.recommend(target_ids, cutoff=10) #CreateCSV.create_csv(target_ids, item_list, 'ItemKNNCBF__RP3beta') return [v0, v1, v2, v3, v4]
"topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF" }) userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) userKNNCF.fit(**{ "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True }) itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_train) itemKNNCBF.fit(topK=700, shrink=200, similarity='jaccard', normalize=True, feature_weighting="TF-IDF") hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, itemKNNCBF, userKNNCF) hyb.fit(alpha=0.5) # Kaggle MAP 0.081 hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, hyb, itemKNNCF) hyb2.fit(alpha=0.5)
def crossval(URM_all, ICM_all, target_ids, k): seed = 1234 + k #+ int(time.time()) np.random.seed() URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.90) ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.95) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True) args = {} p3alpha = P3alphaRecommender.P3alphaRecommender(URM_train) args = { "topK": 991, "alpha": 0.4705816992313091, "normalize_similarity": False } p3alpha.fit(**args) #p3alpha2 = P3alphaRecommender.P3alphaRecommender(URM_train) #args = {"topK": 400, "alpha": 0.5305816992313091, "normalize_similarity": False} #p3alpha2.fit(**args) #rp3beta = RP3betaRecommender.RP3betaRecommender(URM_train) #args = {"topK": 991, "alpha": 0.4705816992313091, "beta": 0.15, "normalize_similarity": False} #rp3beta.fit(**args) itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train) args = { "topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCF.fit(**args) userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) args = { "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True } userKNNCF.fit(**args) itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_all) args = { "topK": 700, "shrink": 100, "similarity": 'jaccard', "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCBF.fit(**args) itemKNNCBF2 = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_all) args = { "topK": 200, "shrink": 15, "similarity": 'jaccard', "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCBF2.fit(**args) #cfw = CFW_D_Similarity_Linalg.CFW_D_Similarity_Linalg(URM_train, ICM_train, itemKNNCF.W_sparse) #cfw.fit(show_max_performance=False, logFile=None, loss_tolerance=1e-6, # iteration_limit=500000, damp_coeff=0.5, topK=900, add_zeros_quota=0.5, normalize_similarity=True) # Need to change bpr code to avoid memory error, useless since it's bad #bpr = SLIM_BPR_Cython(URM_train, recompile_cython=False) #bpr.fit(**{"topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05, # "lambda_j": 0.01, "learning_rate": 0.0001}) pureSVD = PureSVDRecommender.PureSVDRecommender(URM_train) pureSVD.fit(num_factors=340) #hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, itemKNNCBF) #hyb.fit(alpha=0.5) hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, itemKNNCBF, pureSVD) hyb.fit(alpha=0.5) # Kaggle MAP 0.084 rp3beta, itemKNNCBF #hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, itemKNNCBF) #hyb2.fit(alpha=0.5) hyb2 = ItemKNNSimilarityHybridRecommender.ItemKNNSimilarityHybridRecommender( URM_train, itemKNNCBF.W_sparse, itemKNNCF.W_sparse) hyb2.fit(topK=1600) # Kaggle MAP 0.08667 hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, hyb, hyb2) hyb3.fit(alpha=0.5) #hyb3 = RankingHybrid.RankingHybrid(URM_train, hyb, hyb2) #hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, userKNNCF) #hyb3.fit(alpha=0.5) hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF( URM_train, ICM_all) # Kaggle MAP 0.08856 args = { "topK_P": 903, "alpha_P": 0.4108657561671193, "normalize_similarity_P": False, "topK": 448, "shrink": 20, "similarity": "tversky", "normalize": True, "alpha": 0.6290871066510789, "feature_weighting": "TF-IDF" } hyb5.fit(**args) # hyb5.fit(**{"topK_P": 1000, "alpha_P": 0.5432601071314623, "normalize_similarity_P": True, "topK": 620, "shrink": 0, # "similarity": "tversky", "normalize": False, "alpha": 0.5707347522847057, "feature_weighting": "BM25"}) # Kaggle MAP 0.086 :( #hyb6 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, hyb3, hyb5) #hyb6.fit() hyb6 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF( URM_train, ICM_all) args = { "topK_P": 756, "alpha_P": 0.5292654015790155, "normalize_similarity_P": False, "topK": 1000, "shrink": 47, "similarity": "tversky", "normalize": False, "alpha": 0.5207647439152092, "feature_weighting": "none" } hyb6.fit(**args) '''hyb6 = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF(URM_train, ICM_all) args = {"topK_P": 623, "alpha_P": 0.5081918012150626, "normalize_similarity_P": False, "topK": 1000, "shrink": 1000, "similarity": "tversky", "normalize": True, "alpha": 0.44740093610861603, "beta_P": 0.0, "feature_weighting": "TF-IDF"} hyb6.fit(**args)''' hyb7 = RankingHybrid.RankingHybrid(URM_train, hyb6, hyb3) v0 = evaluator_validation.evaluateRecommender(hyb)[0][10]["MAP"] v1 = evaluator_validation.evaluateRecommender(hyb2)[0][10]["MAP"] v2 = evaluator_validation.evaluateRecommender(hyb3)[0][10]["MAP"] #v2 = 0 v3 = evaluator_validation.evaluateRecommender(hyb5)[0][10]["MAP"] v4 = evaluator_validation.evaluateRecommender(hyb6)[0][10]["MAP"] #v4 = 0 v5 = evaluator_validation.evaluateRecommender(hyb7)[0][10]["MAP"] #item_list = hyb6.recommend(target_ids, cutoff=10) #CreateCSV.create_csv(target_ids, item_list, 'HybPureSVD') return [v0, v1, v2, v3, v4, v5]