def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.KDD.MCRec_our_interface.Movielens100K.Movielens100KReader import Movielens100KReader result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens100k": dataset = Movielens100KReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[10]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "PRECISION" n_cases = 50 n_random_starts = 15 runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: if dataset_name == "movielens100k": """ The code provided by the original authors of MCRec can be used only for the original data. Here I am passing to the Wrapper the URM_train matrix that is only required for its shape, the train will be done using the preprocessed data the original authors provided """ from Conferences.KDD.MCRec_github.code.Dataset import Dataset original_dataset_reader = Dataset( 'Conferences/KDD/MCRec_github/data/' + 'ml-100k') MCRec_article_hyperparameters = { "epochs": 200, "latent_dim": 128, "reg_latent": 0, "layers": [512, 256, 128, 64], "reg_layes": [0, 0, 0, 0], "learning_rate": 1e-3, "batch_size": 256, "num_negatives": 4, } MCRec_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MCRecML100k_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, original_dataset_reader ], FIT_KEYWORD_ARGS=MCRec_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=MCRec_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=MCRecML100k_RecommenderWrapper. RECOMMENDER_NAME) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[MCRecML100k_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["PRECISION", "RECALL", "NDCG"], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search_MCRec(dataset_name): from Conferences.KDD.MCRec_our_interface.Movielens100K.Movielens100KReader import Movielens100KReader from Conferences.KDD.MCRec_our_interface.LastFM.LastFMReader import LastFMReader from Conferences.KDD.MCRec_our_interface.Yelp.YelpReader import YelpReader if dataset_name == "movielens100k": dataset = Movielens100KReader() elif dataset_name == "yelp": dataset = YelpReader() elif dataset_name == "lastfm": dataset = LastFMReader() output_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() # Ensure IMPLICIT data assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample if dataset_name == "movielens100k": URM_train += URM_validation evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10], exclude_seen=False) else: evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[10]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender ] metric_to_optimize = "PRECISION" runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, n_cases=35) # pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines ICM_dictionary = dataset.ICM_dict ICM_name_list = ICM_dictionary.keys() for ICM_name in ICM_name_list: try: ICM_object = ICM_dictionary[ICM_name] runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=35) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name in ICM_name_list: try: ICM_object = ICM_dictionary[ICM_name] runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name=ICM_name, ICM_object=ICM_object, allow_weighting=True, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### MCRec if dataset_name == "movielens100k": # Since I am using the original Data reader, the content of URM_validation are seen items, therefore I have to set another # evaluator which does not exclude them # evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10], exclude_seen=False) MCRec_article_parameters = { "epochs": 100, "latent_dim": 128, "reg_latent": 0, "layers": [512, 256, 128, 64], "reg_layes": [0, 0, 0, 0], "learning_rate": 1e-3, "batch_size": 256, "num_negatives": 4, } MCRec_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MCRecML100k_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=MCRec_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=MCRec_article_parameters, output_folder_path=output_folder_path, output_file_name_root=MCRecML100k_RecommenderWrapper. RECOMMENDER_NAME) n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = ["ICM_genre"] print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[MCRecML100k_RecommenderWrapper], ICM_names_to_report_list=ICM_names_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["PRECISION", "RECALL", "NDCG"], cutoffs_to_report_list=[10], ICM_names_to_report_list=ICM_names_to_report_list, other_algorithm_list=[MCRecML100k_RecommenderWrapper])
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/IJCAI/CoupledCF_{}/".format( dataset_name) #Logger(path=result_folder_path, name_file='CoupledCF_' + dataset_name) if dataset_name.startswith("movielens1m"): if dataset_name.endswith("_original"): dataset = Movielens1MReader(result_folder_path, type='original') elif dataset_name.endswith("_ours"): dataset = Movielens1MReader(result_folder_path, type='ours') else: print("Dataset name not supported, current is {}".format( dataset_name)) return UCM_to_report = ["UCM_all"] ICM_to_report = ["ICM_all"] UCM_CoupledCF = dataset.ICM_DICT["UCM_all"] ICM_CoupledCF = dataset.ICM_DICT["ICM_all"] elif dataset_name.startswith("tafeng"): if dataset_name.endswith("_original"): dataset = TafengReader(result_folder_path, type='original') elif dataset_name.endswith("_ours"): dataset = TafengReader(result_folder_path, type='ours') else: print("Dataset name not supported, current is {}".format( dataset_name)) return UCM_to_report = ["UCM_all"] ICM_to_report = ["ICM_original"] UCM_CoupledCF = dataset.ICM_DICT["UCM_all"] ICM_CoupledCF = dataset.ICM_DICT["ICM_original"] else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) UCM_dict = { UCM_name: UCM_object for (UCM_name, UCM_object) in dataset.ICM_DICT.items() if "UCM" in UCM_name } ICM_dict = { UCM_name: UCM_object for (UCM_name, UCM_object) in dataset.ICM_DICT.items() if "ICM" in UCM_name } URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Matrices are 1-indexed, so remove first row print_negative_items_stats(URM_train[1:], URM_validation[1:], URM_test[1:], URM_test_negative[1:]) # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample cutoff_list_validation = [5] cutoff_list_test = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ############################################################################################### ##### Item Content Baselines for ICM_name, ICM_object in ICM_dict.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### User Content Baselines for UCM_name, UCM_object in UCM_dict.items(): try: runParameterSearch_Content( UserKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=UCM_name, ICM_object=UCM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) runParameterSearch_Hybrid( UserKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=UCM_name, ICM_object=UCM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for UCM {} Exception {}".format( UCM_name, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: model_name = dataset.DATASET_NAME earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 5, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize } if 'tafeng' in dataset_name: model_number = 3 article_hyperparameters = { 'learning_rate': 0.005, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': model_name, 'number_model': model_number, 'verbose': 0, 'plot_model': False, } else: # movielens1m and other dataset model_number = 3 article_hyperparameters = { 'learning_rate': 0.001, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': model_name, 'number_model': model_number, 'verbose': 0, 'plot_model': False, } parameterSearch = SearchSingleCase( DeepCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DeepCF_RecommenderWrapper.RECOMMENDER_NAME) if 'tafeng' in dataset_name: # tafeng model has a different structure model_number = 2 article_hyperparameters = { 'learning_rate': 0.005, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': "Tafeng", 'number_model': model_number, 'verbose': 0, 'plot_model': False, } else: # movielens1m use this tructure with model 2 model_number = 2 article_hyperparameters = { 'learning_rate': 0.001, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': "Movielens1M", 'number_model': model_number, 'verbose': 0, 'plot_model': False, } parameterSearch = SearchSingleCase( CoupledCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, UCM_CoupledCF, ICM_CoupledCF ], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME ) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ DeepCF_RecommenderWrapper, CoupledCF_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_to_report, UCM_names_list=UCM_to_report) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=[1, 5, 10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[5], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[5], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "delicious-hetrec2011": dataset = DeliciousHetrec2011Reader(result_folder_path) elif dataset_name == "delicious-hetrec2011-cold-users": dataset = DeliciousHetrec2011ColdUsersReader(result_folder_path) elif dataset_name == "delicious-hetrec2011-cold-items": dataset = DeliciousHetrec2011ColdItemsReader(result_folder_path) elif dataset_name == "lastfm-hetrec2011": dataset = LastFMHetrec2011Reader(result_folder_path) elif dataset_name == "lastfm-hetrec2011-cold-users": dataset = LastFMHetrec2011ColdUsersReader(result_folder_path) elif dataset_name == "lastfm-hetrec2011-cold-items": dataset = LastFMHetrec2011ColdItemsReader(result_folder_path) else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_negative = dataset.URM_DICT["URM_negative"].copy() UCM_train = dataset.UCM_DICT["UCM"].copy() ICM_train = dataset.ICM_DICT["ICM"].copy() if dataset_name == "delicious-hetrec2011" or dataset_name == "lastfm-hetrec2011": URM_train_last_test = URM_train + URM_validation # Ensure IMPLICIT data and disjoint test-train split assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) else: URM_train_last_test = URM_train # Ensure IMPLICIT data and disjoint test-train split assert_implicit_data([URM_train, URM_test]) assert_disjoint_matrices([URM_train, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) metric_to_optimize = "MAP" cutoff_list_validation = [5, 10, 20] cutoff_list_test = [5, 10, 20] n_cases = 50 n_random_starts = 15 evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_negative, cutoff_list=cutoff_list_test) ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: article_hyperparameters = { "pretrain_samples": 3, "pretrain_batch_size": 200, "pretrain_iterations": 5, "embed_len": 128, "topK": 10, "fliter_theta": 16, "aggre_theta": 64, "batch_size": 400, "samples": 3, "margin": 20, "epochs": 30, "iter_without_att": 5, "directed": False, } # Do not modify earlystopping earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": False, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, } # This is a simple version of the tuning code that is reported below and uses SearchSingleCase # You may use this for a simpler testing # recommender_instance = HERSWrapper(URM_train, UCM_train, ICM_train) # # recommender_instance.fit(**article_hyperparameters, # **earlystopping_hyperparameters) # # evaluator_test.evaluateRecommender(recommender_instance) # Fit the DL model, select the optimal number of epochs and save the result parameterSearch = SearchSingleCase( HERSWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_train, ICM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) if dataset_name == "delicious-hetrec2011" or dataset_name == "lastfm-hetrec2011": recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=HERSWrapper.RECOMMENDER_NAME) else: parameterSearch.search( recommender_input_args, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=HERSWrapper.RECOMMENDER_NAME) ################################################################################################ ###### ###### BASELINE ALGORITHMS - Nothing should be modified below this point ###### if flag_baselines_tune: ################################################################################################ ###### Collaborative Baselines collaborative_algorithm_list = [ Random, TopPop, ItemKNNCFRecommender, PureSVDRecommender, SLIM_BPR_Cython, ] # Running hyperparameter tuning of baslines # See if the results are reasonable and comparable to baselines reported in the paper runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, resume_from_saved=True, parallelizeKNN=False, allow_weighting=True, n_cases=n_cases, n_random_starts=n_random_starts) for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_validation_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=result_folder_path, dataset_name=dataset_name, algorithm_name=ALGORITHM_NAME, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=result_folder_path, algorithm_name=ALGORITHM_NAME, file_name_suffix="article_metrics_", dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=cutoff_list_test, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list) print_results_latex_table( result_folder_path=result_folder_path, algorithm_name=ALGORITHM_NAME, file_name_suffix="all_metrics_", dataset_name=dataset_name, metrics_to_report_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_to_report_list=cutoff_list_validation, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list)
def read_data_split_and_search_CollaborativeVAE(dataset_variant, train_interactions): from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader dataset = CiteulikeReader(dataset_variant=dataset_variant, train_interactions=train_interactions) output_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] metric_to_optimize = "RECALL" from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[150]) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines ICM_title_abstract = dataset.ICM_title_abstract.copy() try: runParameterSearch_Content(ItemKNNCBFRecommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name="ICM_title_abstract", ICM_object=ICM_title_abstract, allow_weighting=False, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format(ItemKNNCBFRecommender, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid try: runParameterSearch_Hybrid(ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name="ICM_title_abstract", ICM_object=ICM_title_abstract, allow_weighting=True, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### CollaborativeVAE try: temp_file_folder = output_folder_path + "{}_log/".format( ALGORITHM_NAME) cvae_recommender_article_parameters = { "epochs": 200, "learning_rate_vae": 1e-2, "learning_rate_cvae": 1e-3, "num_factors": 50, "dimensions_vae": [200, 100], "epochs_vae": [50, 50], "batch_size": 128, "lambda_u": 0.1, "lambda_v": 10, "lambda_r": 1, "a": 1, "b": 0.01, "M": 300, "temp_file_folder": temp_file_folder } cvae_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CollaborativeVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_title_abstract], FIT_KEYWORD_ARGS=cvae_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=cvae_recommender_article_parameters, output_folder_path=output_folder_path, output_file_name_root=CollaborativeVAE_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeVAE_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = ["ICM_title_abstract"] dataset_name = "{}_{}".format(dataset_variant, train_interactions) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[CollaborativeVAE_RecommenderWrapper], ICM_names_to_report_list=ICM_names_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["RECALL"], cutoffs_to_report_list=[50, 100, 150, 200, 250, 300], ICM_names_to_report_list=ICM_names_to_report_list, other_algorithm_list=[CollaborativeVAE_RecommenderWrapper])
def read_data_split_and_search(dataset_variant, train_interactions, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) dataset = CiteulikeReader(result_folder_path, dataset_variant=dataset_variant, train_interactions=train_interactions) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() del dataset.ICM_DICT["ICM_tokens_bool"] # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) # Due to the sparsity of the dataset, choosing an evaluation as subset of the train # While keeping validation interaction in the train set if train_interactions == 1: # In this case the train data will contain validation data to avoid cold users assert_disjoint_matrices([URM_train, URM_test]) assert_disjoint_matrices([URM_validation, URM_test]) exclude_seen_validation = False URM_train_last_test = URM_train else: assert_disjoint_matrices([URM_train, URM_validation, URM_test]) exclude_seen_validation = True URM_train_last_test = URM_train + URM_validation assert_implicit_data([URM_train_last_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout( URM_validation, cutoff_list=[150], exclude_seen=exclude_seen_validation) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "RECALL" n_cases = 50 n_random_starts = 15 runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: cvae_recommender_article_hyperparameters = { "epochs": 200, "learning_rate_vae": 1e-2, "learning_rate_cvae": 1e-3, "num_factors": 50, "dimensions_vae": [200, 100], "epochs_vae": [50, 50], "batch_size": 128, "lambda_u": 0.1, "lambda_v": 10, "lambda_r": 1, "a": 1, "b": 0.01, "M": 300, } cvae_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CollaborativeVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"] ], FIT_KEYWORD_ARGS=cvae_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values= cvae_recommender_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CollaborativeVAE_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeVAE_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) dataset_name = "{}_{}".format(dataset_variant, train_interactions) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=other_algorithm_list, KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL"], cutoffs_list=[50, 100, 150, 200, 250, 300], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[150], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)