def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.80) URM_train, URM_validation = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85) """ 26-10-2020 > OPTIMIZATION ON THE RANGE [200, +INF) Already done optimizations: > RECOMMENDER I'AM CONSIDERING (the fastest up to now) > PureSVD > ItemKNNCBF > ItemKNNCF > UserKNNCF > P3A > RP3beta """ f_range = (200, -1) URM_validation = parser.filter_URM_test_by_range(URM_train, URM_validation, f_range) URM_test = parser.filter_URM_test_by_range(URM_train, URM_test, f_range) output_folder_path = "result_experiments_v2/" + "range_" + str( f_range[0]) + "-" + str(f_range[1]) + "/" # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) """ collaborative_algorithm_list = [ #EASE_R_Recommender PipeHybrid001, #Random, #TopPop, #P3alphaRecommender, #RP3betaRecommender, #ItemKNNCFRecommender, #UserKNNCFRecommender, #MatrixFactorization_BPR_Cython, #MatrixFactorization_FunkSVD_Cython, #PureSVDRecommender, #NMFRecommender, #PureSVDItemRecommender #SLIM_BPR_Cython, #SLIMElasticNetRecommender #IALSRecommender #MF_MSE_PyTorch #MergedHybrid000 ] content_algorithm_list= [ #ItemKNNCBFRecommender ] """ algorithm_in_sequence = [(ItemKNNCFRecommender, 'CF'), (UserKNNCFRecommender, 'CF'), (P3alphaRecommender, 'CF'), (RP3betaRecommender, 'CF'), (PureSVDRecommender, 'CF'), (ItemKNNCBFRecommender, 'CBF')] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) for algo, type in algorithm_in_sequence: print(F"OPTIMIZING {algo.RECOMMENDER_NAME} - {type}") if type == 'CF': collaborative_algorithm_list = [] collaborative_algorithm_list.append(algo) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=50 * 0.3, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, #LOOOK AT HEREEEEEEEEEEEEEEEEE parallelizeKNN=False) pool = multiprocessing.Pool(processes=int( multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) elif type == 'CBF': content_algorithm_list = [] content_algorithm_list.append(algo) runParameterSearch_Content_partial = partial( runParameterSearch_Content, URM_train=URM_train, ICM_object=ICM_obj, ICM_name='BookFeatures', n_cases=50, n_random_starts=50 * 0.3, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, metric_to_optimize="MAP", parallelizeKNN=False, allow_weighting=True, #similarity_type_list=['cosine'] ) pool = multiprocessing.Pool(processes=int( multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Content_partial, content_algorithm_list)
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) # SPLIT TO GET THE sub_rec VALID PARTITION URM_train, URM_valid_sub = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) output_folder_path = "result_experiments_v3/seed_" + str(seed) + '/' # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ #EASE_R_Recommender #PipeHybrid001, #Random, #TopPop, #P3alphaRecommender, #RP3betaRecommender, #ItemKNNCFRecommender, #UserKNNCFRecommender, #MatrixFactorization_BPR_Cython, #MatrixFactorization_FunkSVD_Cython, #PureSVDRecommender, #NMFRecommender, #PureSVDItemRecommender #SLIM_BPR_Cython, SLIMElasticNetRecommender #IALSRecommender #MF_MSE_PyTorch #MergedHybrid000 #LinearHybrid002 ] content_algorithm_list = [ #ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10]) evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Content_partial = partial(runParameterSearch_Content, URM_train=URM_train, ICM_object=ICM_obj, ICM_name='1BookFeatures', n_cases = 50, n_random_starts = 20, evaluator_validation= evaluator_valid_sub, evaluator_test = evaluator_valid_hybrid, metric_to_optimize = "MAP", output_folder_path=output_folder_path, parallelizeKNN = False, allow_weighting = True, #similarity_type_list = ['cosine'] ) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Content_partial, content_algorithm_list) """ # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_sub, evaluator_validation=evaluator_valid_sub, evaluator_test=evaluator_valid_hybrid, output_folder_path=output_folder_path, allow_weighting=False, #similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
user_id_array=user_id_array, cutoff=cutoff, remove_seen_flag=remove_seen_flag, items_to_compute=items_to_compute, remove_top_pop_flag=remove_top_pop_flag, remove_custom_items_flag=remove_custom_items_flag, return_scores=return_scores) return round_robin_merging([ranking_list1, ranking_list2]) if __name__ == '__main__': seed = 1205 parser = DataParser('../data') URM_all = parser.get_URM_all() ICM_all = parser.get_ICM_all() URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.85, seed=seed) recommender = ListMerged001(URM_train) recommender.fit() result_dict = evaluator(recommender, URM_test, cutoff=10) print("Round robin:") print(result_dict) result_dict = evaluator(recommender.ger_recommenders()[0], URM_test, cutoff=10) print("RP3beta:") print(result_dict)
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) # SPLIT TO GET THE sub_rec VALID PARTITION URM_train_bis, URM_valid_sub = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) collaborative_algorithm_list = [ #EASE_R_Recommender #PipeHybrid001, #Random, #TopPop, #P3alphaRecommender, #RP3betaRecommender, #ItemKNNCFRecommender, #UserKNNCFRecommender, #MatrixFactorization_BPR_Cython, #MatrixFactorization_FunkSVD_Cython, #PureSVDRecommender, #NMFRecommender, #PureSVDItemRecommender #SLIM_BPR_Cython, #SLIMElasticNetRecommender #IALSRecommender #MF_MSE_PyTorch #MergedHybrid000 #LinearHybrid002ggg HybridCombinationSearch ] content_algorithm_list = [ #ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10]) evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Content_partial = partial(runParameterSearch_Content, URM_train=URM_train, ICM_object=ICM_obj, ICM_name='1BookFeatures', n_cases = 50, n_random_starts = 20, evaluator_validation= evaluator_valid_sub, evaluator_test = evaluator_valid_hybrid, metric_to_optimize = "MAP", output_folder_path=output_folder_path, parallelizeKNN = False, allow_weighting = True, #similarity_type_list = ['cosine'] ) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Content_partial, content_algorithm_list) """ print("Rp3beta training...") rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = { 'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False } rp3b.fit(**rp3b_params) print("Done") print("P3alpha training...") p3a = P3alphaRecommender(URM_train, verbose=False) p3a_params = { 'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False } p3a.fit(**p3a_params) print("Done") print("ItemKnnCF training...") icf = ItemKNNCFRecommender(URM_train, verbose=False) icf_params = { 'topK': 100, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0 } icf.fit(**icf_params) print("Done") print("UserKnnCF training...") ucf = UserKNNCFRecommender(URM_train, verbose=False) ucf_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } ucf.fit(**ucf_params) print("Done") print("ItemKnnCBF training...") icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False) icb_params = { 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } icb.fit(**icb_params) print("Done") print("SlimBPR training...") sbpr = SLIM_BPR_Cython(URM_train, verbose=False) sbpr_params = { 'topK': 979, 'epochs': 130, 'symmetric': False, 'sgd_mode': 'adam', 'lambda_i': 0.004947329669424629, 'lambda_j': 1.1534760845071758e-05, 'learning_rate': 0.0001 } sbpr.fit(**sbpr_params) print("Done") print("SlimElasticNet training...") sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = { 'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143 } sen.fit(**sen_params) print("Done") list_recommender = [rp3b, p3a, icf, ucf, icb, sen, sbpr] list_already_seen = [rp3b, p3a, icf, ucf, icb] for rec_perm in combinations(list_recommender, 3): if rec_perm not in combinations(list_already_seen, 3): recommender_names = '_'.join( [r.RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_v3/seed_" + str( seed) + '/' + recommender_names + '/' # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_hybrid, evaluator_validation=evaluator_valid_hybrid, evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, #similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False, list_rec=rec_perm) pool = multiprocessing.Pool(processes=int( multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ parser = DataParser() seed = 1666 URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.85, seed=seed) URM_train, URM_validation = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) k = 5 output_folder_path = "result_experiments_CV/" # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative = True content_algorithm_list = [ #ItemKNNCBFRecommender ] collaborative_algorithm_list = [ #Random, #TopPop, #P3alphaRecommender, #RP3betaRecommender, ItemKNNCFRecommender, #UserKNNCFRecommender, #MatrixFactorization_BPR_Cython, #MatrixFactorization_FunkSVD_Cython, #PureSVDRecommender, #SLIM_BPR_Cython, #SLIMElasticNetRecommender, #IALSRecommender, ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[5]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) if not collaborative: runParameterSearch_Content_partial = partial( runParameterSearch_Content, URM_train=URM_train, ICM_object=ICM_obj, ICM_name='1BookFeatures', n_cases=50, n_random_starts=20, metric_to_optimize="MAP", output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, #similarity_type_list = ['cosine'] k=k, seed=seed) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Content_partial, content_algorithm_list) else: runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, #evaluator_test = evaluator_test, output_folder_path=output_folder_path, similarity_type_list=["cosine"], parallelizeKNN=False, allow_weighting=False, k=k, seed=seed) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) URM_valid_hybrid = parser.filter_URM_test_by_range(URM_train, URM_valid_hybrid, (3, -1)) collaborative_algorithm_list = [ # EASE_R_Recommender # PipeHybrid001, # Random, # TopPop, # P3alphaRecommender, # RP3betaRecommender, # ItemKNNCFRecommender, # UserKNNCFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, # PureSVDRecommender, # NMFRecommender, # PureSVDItemRecommender # SLIM_BPR_Cython, # SLIMElasticNetRecommender # IALSRecommender # MF_MSE_PyTorch # MergedHybrid000 # LinearHybrid002ggg HybridCombinationSearch ] content_algorithm_list = [ # ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_valid_hybrid, "lower_validations_allowed": 5, "validation_metric": 'MAP', } print('IALS training...') ials = IALSRecommender(URM_train, verbose=False) ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612, 'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20} ials.fit(**ials_params, **earlystopping_keywargs) print("Done") print("PureSVD training...") psvd = PureSVDRecommender(URM_train, verbose=False) psvd_params = {'num_factors': 711} psvd.fit(**psvd_params) print("Done") """ print("Rp3beta training...") rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = { 'topK': 753, 'alpha': 0.3873710051288722, 'beta': 0.0, 'normalize_similarity': False } rp3b.fit(**rp3b_params) print("Done") print("P3alpha training...") p3a = P3alphaRecommender(URM_train, verbose=False) p3a_params = { 'topK': 438, 'alpha': 0.41923120471415165, 'normalize_similarity': False } p3a.fit(**p3a_params) print("Done") print("ItemKnnCF training...") icf = ItemKNNCFRecommender(URM_train, verbose=False) icf_params = { 'topK': 565, 'shrink': 554, 'similarity': 'tversky', 'normalize': True, 'tversky_alpha': 1.9109121434662428, 'tversky_beta': 1.7823834698905734 } icf.fit(**icf_params) print("Done") print("UserKnnCF training...") ucf = UserKNNCFRecommender(URM_train, verbose=False) ucf_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } ucf.fit(**ucf_params) print("Done") print("ItemKnnCBF training...") icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False) icb_params = { 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } icb.fit(**icb_params) print("Done") """ print("SlimElasticNet training...") sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = {'topK': 954, 'l1_ratio': 3.87446082207643e-05, 'alpha': 0.07562657698792305} sen.fit(**sen_params) print("Done") """ list_recommender = [icb, icf, ucf, p3a, rp3b] list_already_seen = [] for rec_perm in combinations(list_recommender, 3): if rec_perm not in combinations(list_already_seen, 3): recommender_names = '_'.join( [r.RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_v3/seed_" + str( seed) + '_3--1' + '/' + recommender_names + '/' # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_hybrid, evaluator_validation=evaluator_valid_hybrid, evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, # similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False, list_rec=rec_perm) pool = multiprocessing.Pool(processes=int( multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85, seed=seed) collaborative_algorithm_list = [ # EASE_R_Recommender # PipeHybrid001, # Random, # TopPop, # P3alphaRecommender, # RP3betaRecommender, # ItemKNNCFRecommender, # UserKNNCFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, # PureSVDRecommender, # NMFRecommender, # PureSVDItemRecommender # SLIM_BPR_Cython, # SLIMElasticNetRecommender # IALSRecommender # MF_MSE_PyTorch # MergedHybrid000 # LinearHybrid002ggg HybridCombinationSearch ] content_algorithm_list = [ # ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_valid_hybrid, "lower_validations_allowed": 5, "validation_metric": 'MAP', } print('IALS training...') ials = IALSRecommender(URM_train, verbose=False) ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612, 'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20} ials.fit(**ials_params, **earlystopping_keywargs) print("Done") print("PureSVD training...") psvd = PureSVDRecommender(URM_train, verbose=False) psvd_params = {'num_factors': 711} psvd.fit(**psvd_params) print("Done") """ rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = {'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False} try: rp3b.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{rp3b.RECOMMENDER_NAME}_for_second_search') print(f"{rp3b.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {rp3b.RECOMMENDER_NAME} ...") rp3b.fit(**rp3b_params) print(f"done.") rp3b.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{rp3b.RECOMMENDER_NAME}_for_second_search') p3a = P3alphaRecommender(URM_train, verbose=False) p3a_params = {'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False} try: p3a.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{p3a.RECOMMENDER_NAME}_for_second_search') print(f"{p3a.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {p3a.RECOMMENDER_NAME} ...") p3a.fit(**p3a_params) print(f"done.") p3a.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{p3a.RECOMMENDER_NAME}_for_second_search') icf = ItemKNNCFRecommender(URM_train, verbose=False) icf_params = {'topK': 55, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0} try: icf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icf.RECOMMENDER_NAME}_for_second_search') print(f"{icf.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {icf.RECOMMENDER_NAME} ...") icf.fit(**icf_params) print(f"done.") icf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icf.RECOMMENDER_NAME}_for_second_search') ucf = UserKNNCFRecommender(URM_train, verbose=False) ucf_params = {'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True} try: ucf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{ucf.RECOMMENDER_NAME}_for_second_search') print(f"{ucf.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {ucf.RECOMMENDER_NAME} ...") ucf.fit(**ucf_params) print(f"done.") ucf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{ucf.RECOMMENDER_NAME}_for_second_search') icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False) icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True} try: icb.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icb.RECOMMENDER_NAME}_for_second_search') print(f"{icb.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {icf.RECOMMENDER_NAME} ...") icb.fit(**icb_params) print(f"done.") icb.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icb.RECOMMENDER_NAME}_for_second_search') sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = {'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143} try: sen.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{sen.RECOMMENDER_NAME}_for_second_search') print(f"{sen.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {sen.RECOMMENDER_NAME} ...") sen.fit(**sen_params) print(f"done.") sen.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{sen.RECOMMENDER_NAME}_for_second_search') print("\nStart.") list_recommender = [icb, icf, ucf, p3a, rp3b, sen] list_already_seen = [] combinations_already_seen = [] """ (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen), (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen) """ for rec_perm in combinations(list_recommender, 3): if rec_perm not in combinations_already_seen: recommender_names = '_'.join([r.RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_v3/seed_" + str( seed) + '/linear_combination/' + recommender_names + '/' print(F"\nTESTING THE COMBO {recommender_names}") # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_hybrid, evaluator_validation=evaluator_valid_hybrid, #evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, # similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False, list_rec=rec_perm) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ parser = DataParser() seed = 1666 URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() #URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.85, seed=seed) #URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85, seed=seed) k = 5 output_folder_path = "result_experiments_CV/" collaborative_algorithm_list = [ HybridCombinationSearchCV #HybridSuperLinear ] from Base.Evaluation.Evaluator import EvaluatorHoldout icb = (ItemKNNCBFRecommender), { 'topK': 164, 'shrink': 8, 'similarity': 'jaccard', 'normalize': True } icbsup = (SpecialItemKNNCBFRecommender), { 'topK': 1000, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } icbcf = (ItemKNN_CBF_CF), { 'topK': 1000, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.241892724784089, 'feature_weighting': 'TF-IDF', 'icm_weight': 1.0 } icf = (ItemKNNCFRecommender), { 'topK': 1000, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'TF-IDF' } ucf = (UserKNNCFRecommender), { 'topK': 163, 'shrink': 846, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'TF-IDF' } p3a = (RP3betaRecommender), { 'topK': 926, 'alpha': 0.4300109351916609, 'beta': 0.01807360750913967, 'normalize_similarity': False } rp3b = (P3alphaRecommender), { 'topK': 575, 'alpha': 0.48009885897470206, 'normalize_similarity': False } sbpr = (SLIM_BPR_Cython, { 'topK': 1000, 'epochs': 130, 'symmetric': False, 'sgd_mode': 'adam', 'lambda_i': 1e-05, 'lambda_j': 1e-05, 'learning_rate': 0.0001 }) sslim = (SSLIMElasticNet, { 'beta': 0.567288665094892, 'topK': 1000, 'l1_ratio': 1e-05, 'alpha': 0.001 }) combo_algorithm_list = [ icb, icbsup, icbcf, icf, ucf, p3a, rp3b, sbpr, sslim ] list_already_seen = [] combinations_already_seen = combinations(list_already_seen, 3) """ (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen), (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen) """ combination_to_be_done = list(combinations(combo_algorithm_list, 3)) for rec_perm in combination_to_be_done: if rec_perm not in combinations_already_seen: recommender_names = '_'.join( [r[0].RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_CV2/seed_" + str( seed) + '/linear/' + recommender_names + '/' print(F"\nTESTING THE COMBO {recommender_names}") if ((icb in rec_perm) or (icbsup in rec_perm)) and not ((icb in rec_perm) and (icbsup in rec_perm)): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_all, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=False, k=k, seed=seed, list_rec=rec_perm, level='hybrid_search') pool = multiprocessing.Pool(processes=int( multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)