def runParameterSearch_SpectralCF(recommender_class, URM_train, earlystopping_parameters, output_file_name_root, n_cases = 35, evaluator_validation= None, evaluator_test=None, metric_to_optimize = "RECALL", output_folder_path ="result_experiments/"): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) ########################################################################################################## if recommender_class is SpectralCF_RecommenderWrapper: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["batch_size"] = Categorical([1024]) hyperparameters_range_dictionary["embedding_size"] = Categorical([4, 8, 16, 32]) hyperparameters_range_dictionary["decay"] = Real(low = 1e-5, high = 1e-1, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["k"] = Integer(low = 1, high = 6) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_parameters ) ######################################################################################################### parameterSearch.search(recommender_parameters, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize)
def read_data_split_and_search_MCRec(dataset_name): from Conferences.KDD.MCRec_our_interface.Movielens100K.Movielens100KReader import Movielens100KReader from Conferences.KDD.MCRec_our_interface.LastFM.LastFMReader import LastFMReader from Conferences.KDD.MCRec_our_interface.Yelp.YelpReader import YelpReader if dataset_name == "movielens100k": dataset = Movielens100KReader() elif dataset_name == "yelp": dataset = YelpReader() elif dataset_name == "lastfm": dataset = LastFMReader() output_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() # Ensure IMPLICIT data assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample if dataset_name == "movielens100k": URM_train += URM_validation evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10], exclude_seen=False) else: evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[10]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender ] metric_to_optimize = "PRECISION" runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, n_cases=35) # pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines ICM_dictionary = dataset.ICM_dict ICM_name_list = ICM_dictionary.keys() for ICM_name in ICM_name_list: try: ICM_object = ICM_dictionary[ICM_name] runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=35) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name in ICM_name_list: try: ICM_object = ICM_dictionary[ICM_name] runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name=ICM_name, ICM_object=ICM_object, allow_weighting=True, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### MCRec if dataset_name == "movielens100k": # Since I am using the original Data reader, the content of URM_validation are seen items, therefore I have to set another # evaluator which does not exclude them # evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10], exclude_seen=False) MCRec_article_parameters = { "epochs": 100, "latent_dim": 128, "reg_latent": 0, "layers": [512, 256, 128, 64], "reg_layes": [0, 0, 0, 0], "learning_rate": 1e-3, "batch_size": 256, "num_negatives": 4, } MCRec_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MCRecML100k_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=MCRec_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=MCRec_article_parameters, output_folder_path=output_folder_path, output_file_name_root=MCRecML100k_RecommenderWrapper. RECOMMENDER_NAME) n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = ["ICM_genre"] print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[MCRecML100k_RecommenderWrapper], ICM_names_to_report_list=ICM_names_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["PRECISION", "RECALL", "NDCG"], cutoffs_to_report_list=[10], ICM_names_to_report_list=ICM_names_to_report_list, other_algorithm_list=[MCRecML100k_RecommenderWrapper])
def read_data_split_and_search_CMN(dataset_name): from Conferences.SIGIR.CMN_our_interface.CiteULike.CiteULikeReader import CiteULikeReader from Conferences.SIGIR.CMN_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader from Conferences.SIGIR.CMN_our_interface.Epinions.EpinionsReader import EpinionsReader if dataset_name == "citeulike": dataset = CiteULikeReader() elif dataset_name == "epinions": dataset = EpinionsReader() elif dataset_name == "pinterest": dataset = PinterestICCVReader() output_folder_path = "result_experiments/{}/{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] metric_to_optimize = "HIT_RATE" # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_validation, URM_test, URM_test_negative]) if dataset_name == "citeulike": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_test, URM_test_negative]) elif dataset_name == "pinterest": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test_negative]) else: assert_disjoint_matrices([URM_train, URM_validation, URM_test, URM_test_negative]) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[5]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[5, 10]) runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train = URM_train, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = output_folder_path, parallelizeKNN = False, allow_weighting = True, n_cases = 35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### CMN try: temp_file_folder = output_folder_path + "{}_log/".format(ALGORITHM_NAME) CMN_article_parameters = { "epochs": 100, "epochs_gmf": 100, "hops": 3, "neg_samples": 4, "reg_l2_cmn": 1e-1, "reg_l2_gmf": 1e-4, "pretrain": True, "learning_rate": 1e-3, "verbose": False, "temp_file_folder": temp_file_folder } if dataset_name == "citeulike": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 50 elif dataset_name == "epinions": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 40 elif dataset_name == "pinterest": CMN_article_parameters["batch_size"] = 256 CMN_article_parameters["embed_size"] = 50 CMN_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase(CMN_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], FIT_KEYWORD_ARGS = CMN_earlystopping_parameters) parameterSearch.search(recommender_parameters, fit_parameters_values=CMN_article_parameters, output_folder_path = output_folder_path, output_file_name_root = CMN_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(CMN_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr)>=1) n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) print_time_statistics_latex_table(result_folder_path = output_folder_path, dataset_name = dataset_name, results_file_prefix_name = ALGORITHM_NAME, other_algorithm_list = [CMN_RecommenderWrapper], ICM_names_to_report_list = [], n_validation_users = n_validation_users, n_test_users = n_test_users, n_decimals = 2) print_results_latex_table(result_folder_path = output_folder_path, results_file_prefix_name = ALGORITHM_NAME, dataset_name = dataset_name, metrics_to_report_list = ["HIT_RATE", "NDCG"], cutoffs_to_report_list = [5, 10], ICM_names_to_report_list = [], other_algorithm_list = [CMN_RecommenderWrapper])
def read_data_split_and_search_SpectralCF(dataset_name, cold_start=False, cold_items=None, isKNN_multiprocess=True, isKNN_tune=True, isSpectralCF_train_default=True, isSpectralCF_tune=True, print_results=True): if dataset_name == "movielens1m_original": assert(cold_start is not True) dataset = Movielens1MReader(type="original") elif dataset_name == "movielens1m_ours": dataset = Movielens1MReader(type="ours", cold_start=cold_start, cold_items=cold_items) elif dataset_name == "hetrec": assert (cold_start is not True) dataset = MovielensHetrec2011Reader() elif dataset_name == "amazon_instant_video": assert (cold_start is not True) dataset = AmazonInstantVideoReader() if not cold_start: output_folder_path = "result_experiments/{}/{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) else: output_folder_path = "result_experiments/{}/{}_cold_{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, cold_items, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") metric_to_optimize = "RECALL" from Base.Evaluation.Evaluator import EvaluatorHoldout if not cold_start: cutoff_list_validation = [50] cutoff_list_test = [20, 30, 40, 50, 60, 70, 80, 90, 100] else: cutoff_list_validation = [20] cutoff_list_test = [20] evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test) ################################################################################################ ###### KNN CF if isKNN_tune: collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train = URM_train, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = output_folder_path, parallelizeKNN = False, allow_weighting = True, n_cases = 35) if isKNN_multiprocess: pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) pool.close() pool.join() else: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### SpectralCF if isSpectralCF_train_default: try: spectralCF_article_parameters = { "epochs": 1000, "batch_size": 1024, "embedding_size": 16, "decay": 0.001, "k": 3, "learning_rate": 1e-3, } spectralCF_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 20, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, } parameterSearch = SearchSingleCase(SpectralCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], FIT_KEYWORD_ARGS = spectralCF_earlystopping_parameters) parameterSearch.search(recommender_parameters, fit_parameters_values = spectralCF_article_parameters, output_folder_path = output_folder_path, output_file_name_root = SpectralCF_RecommenderWrapper.RECOMMENDER_NAME + "_article_default") except Exception as e: print("On recommender {} Exception {}".format(SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() elif isSpectralCF_tune: try: spectralCF_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 20, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, "epochs": 2000 } runParameterSearch_SpectralCF(SpectralCF_RecommenderWrapper, URM_train = URM_train, earlystopping_parameters = spectralCF_earlystopping_parameters, metric_to_optimize = metric_to_optimize, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = output_folder_path, n_cases = 35, output_file_name_root = SpectralCF_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### print results if print_results: n_validation_users = np.sum(np.ediff1d(URM_validation.indptr)>=1) n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) if not cold_start: results_file_root_name = ALGORITHM_NAME else: results_file_root_name = "{}_cold_{}".format(ALGORITHM_NAME, cold_items) print_time_statistics_latex_table(result_folder_path = output_folder_path, dataset_name = dataset_name, results_file_prefix_name = results_file_root_name, other_algorithm_list = [SpectralCF_RecommenderWrapper], n_validation_users = n_validation_users, n_test_users = n_test_users, n_decimals = 2) if cold_start: cutoffs_to_report_list = [20] else: cutoffs_to_report_list = [20, 40, 60, 80, 100] print_results_latex_table(result_folder_path = output_folder_path, results_file_prefix_name = results_file_root_name, dataset_name = dataset_name, metrics_to_report_list = ["RECALL", "MAP"], cutoffs_to_report_list = cutoffs_to_report_list, other_algorithm_list = [SpectralCF_RecommenderWrapper])
def read_data_split_and_search_NeuCF(dataset_name): from Conferences.WWW.NeuMF_our_interface.Movielens1M.Movielens1MReader import Movielens1MReader from Conferences.WWW.NeuMF_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader if dataset_name == "movielens1m": dataset = Movielens1MReader() elif dataset_name == "pinterest": dataset = PinterestICCVReader() output_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test_negative]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, SLIMElasticNetRecommender ] metric_to_optimize = "HIT_RATE" from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample( URM_test, URM_test_negative, cutoff_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### NeuMF try: if dataset_name == "movielens1m": num_factors = 64 elif dataset_name == "pinterest": num_factors = 16 neuMF_article_parameters = { "epochs": 100, "epochs_gmf": 100, "epochs_mlp": 100, "batch_size": 256, "num_factors": num_factors, "layers": [num_factors * 4, num_factors * 2, num_factors], "reg_mf": 0.0, "reg_layers": [0, 0, 0], "num_negatives": 4, "learning_rate": 1e-3, "learning_rate_pretrain": 1e-3, "learner": "sgd", "learner_pretrain": "adam", "pretrain": True } neuMF_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( NeuMF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=neuMF_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=neuMF_article_parameters, output_folder_path=output_folder_path, output_file_name_root=NeuMF_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(NeuMF_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[NeuMF_RecommenderWrapper], n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table(result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=[1, 5, 10], other_algorithm_list=[NeuMF_RecommenderWrapper])
def read_data_split_and_search_CollaborativeVAE(dataset_variant, train_interactions): from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader dataset = CiteulikeReader(dataset_variant=dataset_variant, train_interactions=train_interactions) output_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] metric_to_optimize = "RECALL" from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[150]) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines ICM_title_abstract = dataset.ICM_title_abstract.copy() try: runParameterSearch_Content(ItemKNNCBFRecommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name="ICM_title_abstract", ICM_object=ICM_title_abstract, allow_weighting=False, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format(ItemKNNCBFRecommender, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid try: runParameterSearch_Hybrid(ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name="ICM_title_abstract", ICM_object=ICM_title_abstract, allow_weighting=True, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### CollaborativeVAE try: temp_file_folder = output_folder_path + "{}_log/".format( ALGORITHM_NAME) cvae_recommender_article_parameters = { "epochs": 200, "learning_rate_vae": 1e-2, "learning_rate_cvae": 1e-3, "num_factors": 50, "dimensions_vae": [200, 100], "epochs_vae": [50, 50], "batch_size": 128, "lambda_u": 0.1, "lambda_v": 10, "lambda_r": 1, "a": 1, "b": 0.01, "M": 300, "temp_file_folder": temp_file_folder } cvae_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CollaborativeVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_title_abstract], FIT_KEYWORD_ARGS=cvae_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=cvae_recommender_article_parameters, output_folder_path=output_folder_path, output_file_name_root=CollaborativeVAE_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeVAE_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = ["ICM_title_abstract"] dataset_name = "{}_{}".format(dataset_variant, train_interactions) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[CollaborativeVAE_RecommenderWrapper], ICM_names_to_report_list=ICM_names_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["RECALL"], cutoffs_to_report_list=[50, 100, 150, 200, 250, 300], ICM_names_to_report_list=ICM_names_to_report_list, other_algorithm_list=[CollaborativeVAE_RecommenderWrapper])
def read_data_split_and_search_MultiVAE(dataset_name): from Conferences.WWW.MultiVAE_our_interface.Movielens20M.Movielens20MReader import Movielens20MReader from Conferences.WWW.MultiVAE_our_interface.NetflixPrize.NetflixPrizeReader import NetflixPrizeReader split_type = "cold_user" if dataset_name == "movielens20m": dataset = Movielens20MReader(split_type=split_type) elif dataset_name == "netflixPrize": dataset = NetflixPrizeReader() output_folder_path = "result_experiments/{}/{}_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name, split_type) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) metric_to_optimize = "NDCG" if split_type == "cold_user": collaborative_algorithm_list = [ Random, TopPop, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] URM_train = dataset.URM_train.copy() URM_train_all = dataset.URM_train_all.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_train_all, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train_all, URM_validation, URM_test]) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[100]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[20, 50, 100]) evaluator_validation = EvaluatorUserSubsetWrapper( evaluator_validation, URM_train_all) evaluator_test = EvaluatorUserSubsetWrapper(evaluator_test, URM_train_all) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### MultiVAE try: output_root_path_MultiVAE = output_folder_path + "{}_log/".format( ALGORITHM_NAME) if dataset_name == "movielens20m": epochs = 100 elif dataset_name == "netflixPrize": epochs = 200 multiVAE_article_parameters = { "epochs": epochs, "batch_size": 500, "total_anneal_steps": 200000, "p_dims": None, } multiVAE_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, "temp_file_folder": output_root_path_MultiVAE } parameterSearch = SearchSingleCase( MultiVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=multiVAE_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=multiVAE_article_parameters, output_folder_path=output_folder_path, output_file_name_root=MultiVAE_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( MultiVAE_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[MultiVAE_RecommenderWrapper], n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["RECALL", "NDCG"], cutoffs_to_report_list=[20, 50, 100], other_algorithm_list=[MultiVAE_RecommenderWrapper])
def read_data_split_and_search_CMN(dataset_name): from Conferences.SIGIR.CMN_our_interface.CiteULike.CiteULikeReader import CiteULikeReader from Conferences.SIGIR.CMN_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader from Conferences.SIGIR.CMN_our_interface.Epinions.EpinionsReader import EpinionsReader if dataset_name == "citeulike": dataset = CiteULikeReader() elif dataset_name == "epinions": dataset = EpinionsReader() elif dataset_name == "pinterest": dataset = PinterestICCVReader() output_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() test_mode = False limit = False if limit: p = 700 URM_train = URM_train[:p, :] URM_validation = URM_validation[:p, :] URM_test = URM_test[:p, :] URM_test_negative = URM_test_negative[:p, :] ''' user: 3 is_relevant_current_cutoff: [ True True True False False] recommended_items_current_cutoff: [ 65 86 68 3671 1341] Warning! is_relevant_current_cutoff.sum()>1: 3 relevant_items: [65 68 81 86] relevant_items_rating: [1. 1. 1. 1.] items_to_compute: [ 42 62 65 68 81 86 148 218 559 662 776 792 1164 1341 1418 1491 1593 1603 1617 1697 2140 2251 2446 2517 2566 2643 2719 2769 2771 3081 3133 3161 3188 3268 3409 3666 3671 3845 3864 3897 3984 4272 4327 4329 4431 4519 4565 4568 4718 4812 4915 5096 5128 5137 5141 5184 5217 5241 5371 5394 5415 5492 5521 5775 5798 5830 5831 5931 6005 6281 6375 6558 6638 6644 6661 6705 6881 6898 6939 6970 7010 7018 7147 7224 7327 7404 7453 7466 7475 7561 7764 8064 8102 8222 8368 8530 8957 9101 9322 9368 9619 9782 9832] ''' print('USER 3') print('test ', URM_test[3]) print('train ', URM_train[3]) print('valid ', URM_validation[3]) print('neg ', URM_test_negative[3]) # Durante l'esecuzione era stato notato un HR>1. Il motivo e' che veniva calcolato sul validation set (che per ogni utente ha # piu' oggetti preferiti (non uno) # Alla fine l'HR sara' minore o uguale ad uno perche' e' calcolato sul test set. popularity = get_popularity(URM_train) min_value = np.min(popularity) max_value = np.max(popularity) gap = max_value - min_value popularity = (popularity - min_value) / gap print('Luciano > min:', min_value) print('Luciano > max:', max_value) print('Luciano > normalized popularity:', popularity) set_parameters(popularity=popularity, loss_alpha=200, loss_beta=0.02, loss_scale=1, loss_percentile=get_percentile(popularity, 45), metrics_alpha=100, metrics_beta=0.03, metrics_gamma=5, metrics_scale=1 / 15, metrics_percentile=0.45, new_loss=False) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] # metric_to_optimize = "WEIGHTED_HIT_RATE" metric_to_optimize = "HIT_RATE" # metric_to_optimize = "CUSTOM_HIT_RATE" print('metric_to_optimize:', metric_to_optimize) # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) if dataset_name == "citeulike": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_test, URM_test_negative]) elif dataset_name == "pinterest": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test_negative]) else: assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[5]) if not test_mode: evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[5, 10]) else: evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[5]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: if not test_mode: runParameterSearch_Collaborative_partial(recommender_class) else: print('skipping', recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### CMN ''' Parameters from original paper: { "batch_size": 128, "decay_rate": 0.9, "embed_size": 50, "filename": "data/pinterest.npz", "grad_clip": 5.0, "hops": 2, "item_count": "9916", "l2": 0.1, "learning_rate": 0.001, "logdir": "result/004/", "max_neighbors": 1586, "neg_count": 4, "optimizer": "rmsprop", "optimizer_params": "{'momentum': 0.9, 'decay': 0.9}", "pretrain": "pretrain/pinterest_e50.npz", "save_directory": "result/004/", "tol": 1e-05, "user_count": "55187" } ''' try: temp_file_folder = output_folder_path + "{}_log/".format( ALGORITHM_NAME) CMN_article_parameters = { "epochs": 100, "epochs_gmf": 100, "hops": 3, "neg_samples": 4, "reg_l2_cmn": 1e-1, "reg_l2_gmf": 1e-4, "pretrain": True, "learning_rate": 1e-3, "verbose": False, "temp_file_folder": temp_file_folder } if dataset_name == "citeulike": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 50 elif dataset_name == "epinions": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 40 elif dataset_name == "pinterest": CMN_article_parameters["batch_size"] = 128 # CMN_article_parameters["batch_size"] = 256 CMN_article_parameters["embed_size"] = 50 CMN_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CMN_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=CMN_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=CMN_article_parameters, output_folder_path=output_folder_path, output_file_name_root=CMN_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(CMN_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[CMN_RecommenderWrapper], ICM_names_to_report_list=[], n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) if not test_mode: print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=[5, 10], ICM_names_to_report_list=[], other_algorithm_list=[CMN_RecommenderWrapper]) else: print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=[5], ICM_names_to_report_list=[], other_algorithm_list=[CMN_RecommenderWrapper])
def runParameterSearch_Hybrid(recommender_class, URM_train, ICM_object, ICM_name, n_cases=30, evaluator_validation=None, evaluator_test=None, metric_to_optimize="PRECISION", output_folder_path="result_experiments/", parallelizeKNN=False, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) ########################################################################################################## output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format( ICM_name) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class is ItemKNN_CFCBF_Hybrid_Recommender: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["ICM_weight"] = Real( low=1e-2, high=1e2, prior='log-uniform') recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[ICM_object, URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, parameter_search_space=hyperparameters_range_dictionary, recommender_parameters=recommender_parameters, parameterSearch=parameterSearch, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) resultList = pool.map( run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return
def runParameterSearch_Collaborative(recommender_class, URM_train, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) parameterSearch.search(recommender_parameters, fit_parameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_parameters=recommender_parameters, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 250) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_parameters, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def runParameterSearch_ContentUser(recommender_class, URM_train, UCM_object, UCM_name, n_cases=30, evaluator_validation=None, evaluator_test=None, metric_to_optimize="PRECISION", output_folder_path="result_experiments/", parallelizeKNN=False, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) ########################################################################################################## output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format( UCM_name) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[UCM_object, URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) run_KNNCBFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_parameters=recommender_parameters, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting) if parallelizeKNN: pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(run_KNNCBFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCBFRecommender_on_similarity_type_partial(similarity_type)