def main(): # Data loading root_data_path = os.path.join(get_project_root_path(), "data/") data_reader = RecSys2019Reader(root_data_path) data_reader = New_DataSplitter_leave_k_out( data_reader, k_out_value=K_OUT, use_validation_set=False, allow_cold_users=ALLOW_COLD_USERS, force_new_split=True, seed=get_split_seed()) data_reader.load_data() URM_train, URM_test = data_reader.get_holdout_split() ICM_all, _ = get_ICM_train_new(data_reader) UCM_all, _ = get_UCM_train_new(data_reader) # Ignoring users ignore_users = get_ignore_users( URM_train, data_reader.get_original_user_id_to_index_mapper(), lower_threshold=LOWER_THRESHOLD, upper_threshold=UPPER_THRESHOLD, ignore_non_target_users=IGNORE_NON_TARGET_USERS) evaluator = EvaluatorHoldout(URM_test, cutoff_list=[CUTOFF], ignore_users=ignore_users) # Model evaluation model = get_model(URM_train, ICM_all, UCM_all) print(evaluator.evaluateRecommender(model))
def get_ignore_users(URM_train, original_user_id_to_index_mapper, lower_threshold, upper_threshold, ignore_non_target_users=True): data_path = os.path.join(get_project_root_path(), "data/") ignore_users = [] users_outside = get_users_outside_profile_len( URM_train, lower_threshold=lower_threshold, upper_threshold=upper_threshold) if len(users_outside) > 0: print("Excluding users with profile length outside ({}, {})".format( lower_threshold, upper_threshold)) ignore_users = np.concatenate([ignore_users, users_outside]) if ignore_non_target_users: print("Excluding non-target users...") original_target_users = read_target_users( os.path.join(data_path, "data_target_users_test.csv")) target_users = get_index_target_users( original_target_users, original_user_id_to_index_mapper) non_target_users = np.setdiff1d(np.arange(URM_train.shape[0]), target_users, assume_unique=True) ignore_users = np.concatenate([ignore_users, non_target_users]) return np.unique(ignore_users)
def read_split_load_data(k_out, allow_cold_users, seed): root_data_path = os.path.join(get_project_root_path(), "data/") data_reader = RecSys2019Reader(root_data_path) data_reader = New_DataSplitter_leave_k_out( data_reader, k_out_value=k_out, use_validation_set=False, allow_cold_users=allow_cold_users, force_new_split=True, seed=seed) data_reader.load_data() return data_reader
def __init__(self, URM_train, train_svm_file_path, approximate_recommender: BaseRecommender, ICM_train=None, UCM_train=None, item_feature_fields=None, user_feature_fields=None, valid_svm_file_path=None, max_items_to_predict=1000, model_filename="model.out", model_type="ffm", temp_relative_folder="temp/", verbose=True): self.ICM_train = ICM_train self.UCM_train = UCM_train user_fields = np.full(shape=URM_train.shape[0], fill_value=0) item_fields = np.full(shape=URM_train.shape[1], fill_value=1) if item_feature_fields is not None: item_feature_fields = item_feature_fields + 2 if user_feature_fields is not None: user_feature_fields = user_feature_fields + np.max( item_feature_fields) + 1 self.fields = np.concatenate([ user_fields, item_fields, item_feature_fields, user_feature_fields ]) self.approximate_recommender = approximate_recommender self.max_items_to_predict = max_items_to_predict # Set path of temp folder and model_path root_path = get_project_root_path() fm_data_path = os.path.join(root_path, "resources", "ffm_data") self.temp_folder = os.path.join(fm_data_path, temp_relative_folder) self.model_folder = os.path.join(fm_data_path, "model") self.model_path = os.path.join(self.model_folder, model_filename) if model_type == "ffm": self.model = xl.create_ffm() elif model_type == "fm": self.model = xl.create_fm() else: raise ValueError( "model_type is inexistent, choose between ffm and fm") self.model.setTrain(train_svm_file_path) if valid_svm_file_path is not None: self.model.setValidate(valid_svm_file_path) super().__init__(URM_train, verbose)
ICM_all, item_feature_fields = get_ICM_with_fields(data_reader) # Build UCMs: do not change the order of ICMs and UCMs UCM_all, user_feature_fields = get_UCM_with_fields(data_reader) cold_users_mask = np.ediff1d(URM_train.tocsr().indptr) == 0 cold_users = np.arange(URM_train.shape[0])[cold_users_mask] cutoff_list = [10] evaluator = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list, ignore_users=cold_users) best_model = new_best_models.ItemCBF_CF.get_model(URM_train, ICM_all) best_model.fit() ffm_data_path = os.path.join(get_project_root_path(), "resources", "ffm_data") model = FieldAwareFMRecommender( URM_train, model_type="fm", train_svm_file_path=os.path.join( ffm_data_path, "users_25_item_20_train_uncompressed.txt"), valid_svm_file_path=os.path.join( ffm_data_path, "users_25_item_20_valid_uncompressed.txt"), approximate_recommender=best_model, ICM_train=ICM_all, UCM_train=UCM_all, item_feature_fields=item_feature_fields, user_feature_fields=user_feature_fields, max_items_to_predict=20) #model.load_model(os.path.join(ffm_data_path, "model"), "model_row_4.out")
ignore_users = get_ignore_users( URM_train, data_reader.get_original_user_id_to_index_mapper(), lower_threshold=LOWER_THRESHOLD, upper_threshold=UPPER_THRESHOLD, ignore_non_target_users=IGNORE_NON_TARGET_USERS) evaluator = EvaluatorHoldout(URM_test, cutoff_list=[CUTOFF], ignore_users=ignore_users) URM_train_list.append(URM_train) evaluator_list.append(evaluator) # Setting evaluator evaluator = EvaluatorCrossValidationKeepKOut(URM_train_list, evaluator_list, cutoff=CUTOFF) results = evaluator.crossevaluateRecommender(recommender_class, model_parameters) # Writing on file cross validation results date_string = datetime.now().strftime('%b%d_%H-%M-%S') cross_valid_path = os.path.join(get_project_root_path(), "report/cross_validation/") file_path = os.path.join( cross_valid_path, "cross_valid_{}_{}.txt".format(model_name, date_string)) write_results_on_file(file_path, recommender_class.RECOMMENDER_NAME, model_parameters, num_folds, seed_list, results)
def run_parameter_search_mf_collaborative( recommender_class, URM_train, UCM_train=None, UCM_name="NO_UCM", ICM_train=None, ICM_name="NO_ICM", URM_train_last_test=None, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelize_search=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", approximate_recommender=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME + "_" + ICM_name + "_" + UCM_name parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) hyperparameters_range_dictionary = {} if recommender_class is ImplicitALSRecommender: hyperparameters_range_dictionary["num_factors"] = Integer(300, 550) hyperparameters_range_dictionary["regularization"] = Real( low=1e-2, high=200, prior='log-uniform') hyperparameters_range_dictionary["epochs"] = Categorical([50]) hyperparameters_range_dictionary[ "confidence_scaling"] = Categorical(["linear"]) hyperparameters_range_dictionary["alpha"] = Real( low=1e-2, high=1e2, prior='log-uniform') if recommender_class is MF_BPR_Recommender: hyperparameters_range_dictionary["num_factors"] = Categorical( [600]) hyperparameters_range_dictionary["regularization"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-2, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["epochs"] = Categorical([300]) if recommender_class is FunkSVDRecommender: hyperparameters_range_dictionary["num_factors"] = Integer(50, 400) hyperparameters_range_dictionary["regularization"] = Real( low=1e-8, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-6, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["epochs"] = Categorical([300]) if recommender_class is LogisticMFRecommender: hyperparameters_range_dictionary["num_factors"] = Integer(20, 400) hyperparameters_range_dictionary["regularization"] = Real( low=1e-5, high=1e1, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-2, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["epochs"] = Categorical([300]) if recommender_class is LightFMRecommender: recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'UCM_train'] = UCM_train recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'ICM_train'] = ICM_train hyperparameters_range_dictionary['no_components'] = Categorical( [100]) hyperparameters_range_dictionary['epochs'] = Categorical([100]) run_light_fm_search(parameterSearch, recommender_input_args, hyperparameters_range_dictionary, URM_train_last_test=URM_train_last_test, parallelize_search=parallelize_search, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, save_model=save_model) if recommender_class is FieldAwareFMRecommender: if approximate_recommender is None: raise ValueError("approximate_recommender has to be set") root_path = get_project_root_path() train_svm_file_path = os.path.join(root_path, "resources", "fm_data", "URM_ICM_UCM_uncompressed.txt") recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'train_svm_file_path'] = train_svm_file_path recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'approximate_recommender'] = approximate_recommender recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'UCM_train'] = UCM_train recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'ICM_train'] = ICM_train hyperparameters_range_dictionary['epochs'] = Categorical([200]) hyperparameters_range_dictionary['latent_factors'] = Integer( low=20, high=500) hyperparameters_range_dictionary['regularization'] = Real( low=10e-7, high=10e-1, prior="log-uniform") hyperparameters_range_dictionary['learning_rate'] = Real( low=10e-3, high=10e-1, prior="log-uniform") if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
fields = np.concatenate( [user_fields, item_fields, item_feature_fields, user_feature_fields]) positive_URM = URM_train negative_URM = sample_negative_interactions_uniformly( negative_sample_size=len(positive_URM.data) * 10, URM=positive_URM) URM_positive_FM_matrix = convert_URM_to_FM(positive_URM) URM_negative_FM_matrix = convert_URM_to_FM(negative_URM) URM_FM_matrix = sps.vstack( [URM_positive_FM_matrix, URM_negative_FM_matrix], format='csr') URM_FM_matrix = add_ICM_info(URM_FM_matrix, ICM_all, URM_train.shape[0]) URM_FM_matrix = add_UCM_info(URM_FM_matrix, UCM_all, 0) root_path = get_project_root_path() fm_data_path = os.path.join(root_path, "resources", "ffm_data") # Prepare train sparse matrix and labels for dumping to file FM_sps_matrix = URM_FM_matrix.copy() labels = np.concatenate([ np.ones(shape=URM_positive_FM_matrix.shape[0], dtype=np.int).tolist(), np.zeros(shape=URM_negative_FM_matrix.shape[0], dtype=np.int).tolist() ]) random_state = 69420 x_train, x_valid, y_train, y_valid = train_test_split( FM_sps_matrix, labels, shuffle=True, test_size=0.1,