def evaluate(urm, ICM): URM_train, URM_val, URM_test = splitter.split(urm, testing=0.1, validation=0.2) evaluator_validation = EvaluatorHoldout(URM_val, [10]) evaluator_test = EvaluatorHoldout(URM_test, [10]) recommender = Hybrid(URM_train, ICM) recommender.fit() results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(recommender) print(results_run_string) results_run_dict, results_run_string = evaluator_test.evaluateRecommender(recommender) print(results_run_string)
def objective(latent_factors, regularization, alpha): # parameters must be the same defined above average_map = 0.0 n_tests = 3 # number of tests (on different data split) seed = [1234, 12, 34] # seed to define the split for i in range(n_tests): URM_train, URM_test = splitter.split_train_test(urm, testing=0.15, seed=seed[i]) URM_test = n_interaction_interval( URM_test, 0, 5 ) # maintain only users with a number of interaction between 0 and 5 (excluded) evaluator_test = EvaluatorHoldout(URM_test, [10]) rec = ALS(URM_train) # can be used also with other recommenders rec.fit(latent_factors=latent_factors, regularization=regularization, iterations=100, alpha=alpha) # pass the parameter we are tuning results_run_dict, results_run_string = evaluator_test.evaluateRecommender( rec) cumulative_MAP = results_run_dict[10]['MAP'] average_map += cumulative_MAP print( f"\nlatent_factors: {latent_factors}, regularization: {regularization}\navg MAP: {average_map/n_tests}\n\n" ) return -average_map / n_tests # return the avg_map among the different test (to avoid overfitting on a specific data split)
def single_test(urm_train, urm_test, urm_valid, x_tick): evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) recommender = HybridNorm3Recommender(urm_train) recommender.fit(beta=best_alpha) result, str_result = evaluator_test.evaluateRecommender(recommender) return result[10]['MAP']
def ablation_study(arguments): study_path = 'ablation_study' if not os.path.exists(study_path): os.makedirs(study_path, exist_ok=False) exp_path = 'experiments' datasets = [] modes = ['user', 'item'] run_all = False if '--run-all' in arguments: datasets = all_datasets run_all = True for arg in arguments: if arg in name_datasets and not run_all: datasets.append(all_datasets[name_datasets.index(arg)]) if arg in modes: modes = [arg] cutoffs = [5, 10, 20, 50] marker = itertools.cycle(['o', '^', 's', 'p', '1', 'D', 'P', '*']) for m in modes: for d in datasets: plotting_data = {c: {m: [] for m in metrics} for c in cutoffs} best_params = load_best_params(exp_path, d if isinstance(d, str) else d.DATASET_NAME, 'GANMF', m) range_coeff = np.arange(0, 1.1, 0.2) for coeff in range_coeff: best_params['recon_coefficient'] = coeff URM_train, URM_test, _, _, _ = load_URMs(d, dataset_kwargs) set_seed(seed) test_evaluator = EvaluatorHoldout(URM_test, cutoffs, exclude_seen=True) model = GANMF(URM_train, mode=m, seed=seed, is_experiment=True) model.fit(validation_set=None, sample_every=None, validation_evaluator=None, **best_params) result_dict, result_str = test_evaluator.evaluateRecommender(model) plotting_data[coeff] = {} for c in cutoffs: for met in metrics: plotting_data[c][met].append(result_dict[c][met]) dname = d if isinstance(d, str) else d.DATASET_NAME substudy_path = os.path.join(study_path, dname + '_GANMF_' + m) if not os.path.exists(substudy_path): os.makedirs(substudy_path, exist_ok=False) for c in cutoffs: fig, ax = plt.subplots(figsize=(20, 10)) ax.set_xlabel('Feature Matching Coefficient') for met in metrics: ax.plot(range_coeff, plotting_data[c][met], label=met, marker=next(marker)) ax.legend(loc='best', fontsize='x-large') fig.savefig(os.path.join(substudy_path, str(c) + '_feature_matching_effect.png'), bbox_inches='tight')
def get_precision(learning_rate, num_epoch, URM_train, URM_test): recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False) recommender.fit(epochs=num_epoch, batch_size=1, sgd_mode='sgd', learning_rate=learning_rate, positive_threshold_BPR=1) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10]) results_dict, results_run_string = evaluator_validation.evaluateRecommender( recommender) return results_dict[10]['PRECISION']
def search_param(alpha, beta, topK): res = [] for current in my_input: recommender = current[1] urm_valid = current[0] evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10]) #recommender.fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li, mi=mi) recommender.fit(alpha=alpha, beta=beta, topK=int(topK)) result_valid, str_result = evaluator_valid.evaluateRecommender(recommender) res.append(result_valid[10]['MAP']) print('Il max valid è il n: {} con : {}'.format(vec['n_valid'], optimizer.max)) print('Il max test è il n : {} con test : {}'.format(vec['n_test'], vec['max_test'])) res = np.array(res) print('Il Map corrente è : {}'.format(res.mean())) if res.mean() > vec['max_valid']: vec['n_valid'] = vec['n'] vec['max_valid'] = res.mean() print('new max valid found') res_test = [] for current in my_input: recommender = current[1] urm_test = current[2] evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) #recommender.fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li, mi=mi) recommender.fit(alpha=alpha, beta=beta, topK=int(topK)) result_test, str_result = evaluator_test.evaluateRecommender(recommender) res_test.append(result_test[10]['MAP']) res_test = np.array(res_test) if res_test.mean() > vec['max_test']: print('un nuovo max è stato trovato') vec['max_test'] = res_test.mean() vec['n_test'] = vec['n'] vec['n'] += 1 return res.mean()
def single_test(urm_train, urm_test, urm_valid, x_tick): evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10], verbose=False) MAP_per_k_valid = [] recommender = HybridNorm3Recommender(urm_train) for alpha in tqdm(x_tick): recommender.fit(beta=alpha) result_dict, res_str = evaluator_valid.evaluateRecommender(recommender) MAP_per_k_valid.append(result_dict[10]["MAP"]) return MAP_per_k_valid
def single_test(urm_train, urm_test, urm_valid): evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) recommender = UserKNNCBFRecommender(urm_train, ucm_all) recommender.fit(shrink=1777, topK=1998, similarity='tversky', feature_weighting='BM25', tversky_alpha=0.1604953616, tversky_beta=0.9862348646) result, str_result = evaluator_test.evaluateRecommender(recommender) # result, str_result = evaluator_valid.evaluateRecommender(recommender) # res[num_test] = result[10]['MAP'] return result[10]['MAP']
# Now that we have the split, we can create the evaluators. # The constructor of the evaluator allows you to specify the evaluation conditions (data, recommendation list length, # excluding already seen items). Whenever you want to evaluate a model, use the evaluateRecommender function of the evaluator object evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[5], exclude_seen=False) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[5, 10, 20], exclude_seen=False) # We now fit and evaluate a non personalized algorithm recommender = TopPop(URM_train) recommender.fit() results_dict, results_run_string = evaluator_validation.evaluateRecommender( recommender) print("Result of TopPop is:\n" + results_run_string) # We now fit and evaluate a personalized algorithm passing some hyperparameters to the fit functions recommender = P3alphaRecommender(URM_train) recommender.fit(topK=100, alpha=0.5) results_dict, results_run_string = evaluator_validation.evaluateRecommender( recommender) print("Result of P3alpha is:\n" + results_run_string) # We now use a content-based algorithm and a hybrid content-collaborative algorithm ICM_genres = ICM_dict["ICM_genres"] recommender = ItemKNNCBFRecommender(URM_train, ICM_genres) recommender.fit(topK=100, similarity="cosine")
def main(arguments): test_results_path = 'test_results' if not os.path.exists(test_results_path): os.makedirs(test_results_path, exist_ok=False) exp_path = 'experiments' datasets = [] run_all = False train_mode = ['user', 'item'] cutoffs = [5, 10, 20, 50] recommender = None dict_rec_classes = {} dict_rec_classes['TopPop'] = TopPop dict_rec_classes['Random'] = Random dict_rec_classes['PureSVD'] = PureSVDRecommender dict_rec_classes['BPR'] = MatrixFactorization_BPR_Cython dict_rec_classes['ALS'] = IALSRecommender dict_rec_classes['NMF'] = NMFRecommender dict_rec_classes['GANMF'] = GANMF dict_rec_classes['CFGAN'] = CFGAN dict_rec_classes['DisGANMF'] = DisGANMF dict_rec_classes['SLIMBPR'] = SLIM_BPR_Cython dict_rec_classes['fullGANMF'] = fullGANMF dict_rec_classes['DeepGANMF'] = DeepGANMF if '--run-all' in arguments: datasets = all_datasets run_all = True for arg in arguments: if arg in name_datasets and not run_all: datasets.append(all_datasets[name_datasets.index(arg)]) if arg in ['user', 'item']: train_mode = [arg] if arg in all_recommenders and recommender is None: recommender = arg if recommender not in ['GANMF', 'DisGANMF', 'CFGAN', 'fullGANMF', 'DeepGANMF']: train_mode = [''] for d in datasets: dname = d if isinstance(d, str) else d.DATASET_NAME for mode in train_mode: if recommender == 'fullGANMF': best_params = load_best_params(exp_path, dname, 'GANMF', mode) else: best_params = load_best_params(exp_path, dname, dict_rec_classes[recommender].RECOMMENDER_NAME, mode) set_seed(seed) URM_train, URM_test, _, _, _ = load_URMs(d, dataset_kwargs) test_evaluator = EvaluatorHoldout(URM_test, cutoffs, exclude_seen=True) if recommender in ['GANMF', 'DisGANMF', 'CFGAN', 'fullGANMF', 'DeepGANMF']: model = dict_rec_classes[recommender](URM_train, mode=mode, seed=seed, is_experiment=True) model.fit(validation_set=None, sample_every=None, validation_evaluator=None, **best_params) else: model = dict_rec_classes[recommender](URM_train) model.fit(**best_params) results_dict, results_str = test_evaluator.evaluateRecommender(model) save_path = os.path.join(test_results_path, model.RECOMMENDER_NAME + '_' + mode + '_' + dname) if not os.path.exists(save_path): os.makedirs(save_path, exist_ok=False) with open(os.path.join(save_path, 'test_results.txt'), 'a') as f: f.write(results_str) else: results_filename = os.path.join(save_path, 'test_results.txt') if not os.path.exists(results_filename): with open(results_filename, 'a') as f: f.write(results_str)
from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg from Hybrid.HybridNorm3Recommender import HybridNorm3Recommender from MatrixFactorization.ALSRecommender import ALSRecommender from MatrixFactorization.BPRRecommender import BPRRecommender import similaripy as sim data = DataManager() urm_train = data.get_urm() urm_train, urm_test = split_train_leave_k_out_user_wise(data.get_urm(), temperature='normal') urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, temperature='valid2') urm_train_warm = data.create_test_warm_users(urm_train, threshold=10) urm_test_warm = data.create_test_warm_users(urm_test, threshold=10) evaluator_test_warm = EvaluatorHoldout(urm_test_warm, cutoff_list=[10]) recommender = UserKNNCFRecommender(urm_train) recommender.fit(shrink=2, topK=600, normalize=True) recommender_warm = UserKNNCFRecommender(urm_train_warm) recommender_warm.fit(shrink=2, topK=500, normalize=True) result, str_result = evaluator_test_warm.evaluateRecommender(recommender) print('The Map of test of urm normal is : {}'.format(result[10]['MAP'])) result, str_result = evaluator_test_warm.evaluateRecommender(recommender_warm) print('The Map of test of urm warm is : {}'.format(result[10]['MAP']))
rec.save_model( f'stored_recommenders/seed_{str(seed)}_hybrid_search/{rec.RECOMMENDER_NAME}/', f'{str(seed)}_fold-{str(i)}') # r, _ =evaluator_list[i].evaluateRecommender(rec) # result.append(r[10]['MAP']) # print(result) # print(np.average(result)) exit(0) print("Making a submission... ") parser = DataParser() URM_all = parser.get_URM_all() ICM_all = parser.get_ICM_all() dict_1 = {'ICM_train': ICM_all} rec = ItemKNNCBFRecommender(URM_all, *dict_1) # rec_sub = UserWiseHybrid009(URM_all, ICM_all, submission=True) # rec_sub.fit() # create_csv(parser, rec_sub, 'UserWiseHybrid009') URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.90, seed=1205) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) recommender = UserWiseHybrid009(URM_train, ICM_all, submission=True) recommender.fit() result, _ = evaluator_test.evaluateRecommender(recommender) print(result[10])
gan = GANMF(URM_train, mode='user') gan.fit(num_factors=10, emb_dim=128, d_reg=1e-4, g_reg=0, epochs=300, batch_size=128, g_lr=1e-3, d_lr=1e-3, d_steps=1, g_steps=1, recon_coefficient=0.05, m=3, allow_worse=5, freq=5, validation_evaluator=evaluatorValidation, sample_every=10, validation_set=URM_validation) if not only_build: results_dic, results_run_string = evaluator.evaluateRecommender(gan) print(results_run_string) map_folder = os.path.join('plots', gan.RECOMMENDER_NAME, 'MAP_' + str(results_dic[5]['MAP'])[:7]) if os.path.exists(map_folder): shutil.rmtree(map_folder) shutil.move(src=gan.logsdir, dst=map_folder)
userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) userKNNCF.fit(**{ "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True }) itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_train) itemKNNCBF.fit(topK=700, shrink=200, similarity='jaccard', normalize=True, feature_weighting="TF-IDF") hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, itemKNNCBF, userKNNCF) hyb.fit(alpha=0.5) # Kaggle MAP 0.081 hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, hyb, itemKNNCF) hyb2.fit(alpha=0.5) print(evaluator_validation.evaluateRecommender(userKNNCF)) print(evaluator_validation.evaluateRecommender(hyb)) print(evaluator_validation.evaluateRecommender(hyb2)) item_list = hyb.recommend(target_ids, cutoff=10) CreateCSV.create_csv(target_ids, item_list, 'Hyb_User_Item_KNNCF')
def run_recommender(recommender_class): temp_save_file_folder = "./result_experiments/__temp_model/" if not os.path.isdir(temp_save_file_folder): os.makedirs(temp_save_file_folder) try: dataset_object = Movielens1MReader() dataSplitter = DataSplitter_leave_k_out(dataset_object, k_out_value=2) dataSplitter.load_data() URM_train, URM_validation, URM_test = dataSplitter.get_holdout_split() write_log_string(log_file, "On Recommender {}\n".format(recommender_class)) recommender_object = recommender_class(URM_train) if isinstance(recommender_object, Incremental_Training_Early_Stopping): fit_params = {"epochs": 15} else: fit_params = {} recommender_object.fit(**fit_params) write_log_string(log_file, "Fit OK, ") evaluator = EvaluatorHoldout(URM_test, [5], exclude_seen=True) _, results_run_string = evaluator.evaluateRecommender(recommender_object) write_log_string(log_file, "EvaluatorHoldout OK, ") evaluator = EvaluatorNegativeItemSample(URM_test, URM_train, [5], exclude_seen=True) _, _ = evaluator.evaluateRecommender(recommender_object) write_log_string(log_file, "EvaluatorNegativeItemSample OK, ") recommender_object.saveModel(temp_save_file_folder, file_name="temp_model") write_log_string(log_file, "saveModel OK, ") recommender_object = recommender_class(URM_train) recommender_object.loadModel(temp_save_file_folder, file_name="temp_model") evaluator = EvaluatorHoldout(URM_test, [5], exclude_seen=True) _, results_run_string_2 = evaluator.evaluateRecommender(recommender_object) write_log_string(log_file, "loadModel OK, ") shutil.rmtree(temp_save_file_folder, ignore_errors=True) write_log_string(log_file, " PASS\n") write_log_string(log_file, results_run_string + "\n\n") except Exception as e: print("On Recommender {} Exception {}".format(recommender_class, str(e))) log_file.write("On Recommender {} Exception {}\n\n\n".format(recommender_class, str(e))) log_file.flush() traceback.print_exc()
class RecSysExp: def __init__(self, recommender_class, dataset, fit_param_names=[], metric='MAP', method='bayesian', at=5, verbose=True, seed=1234): # Seed for reproducibility of results and consistent initialization of weights/splitting of dataset set_seed(seed) self.recommender_class = recommender_class self.dataset = dataset self.dataset_name = self.dataset if isinstance(self.dataset, str) else self.dataset.DATASET_NAME self.fit_param_names = fit_param_names self.metric = metric self.method = method self.at = at self.verbose = verbose self.seed = seed self.isGAN = False # if isinstance(self.dataset, str) and self.dataset in Movielens.urls.keys(): # self.reader = Movielens(version=self.dataset, **dataset_kwargs) # else: # self.reader = self.dataset(**dataset_kwargs) # self.logsdir = os.path.join('experiments', self.recommender_class.RECOMMENDER_NAME + '_' + self.reader.DATASET_NAME) self.logsdir = os.path.join('experiments', self.recommender_class.RECOMMENDER_NAME + '_' + train_mode + '_' + self.dataset_name) if not os.path.exists(self.logsdir): os.makedirs(self.logsdir, exist_ok=False) # with open(os.path.join(self.logsdir, 'dataset_config.txt'), 'w') as f: # json.dump(self.reader.config, f, indent=4) codesdir = os.path.join(self.logsdir, 'code') os.makedirs(codesdir, exist_ok=True) shutil.copy(os.path.abspath(sys.modules[self.__module__].__file__), codesdir) shutil.copy(os.path.abspath(sys.modules[self.recommender_class.__module__].__file__), codesdir) # self.URM_train, self.URM_test, self.URM_validation = self.reader.split_urm(split_ratio=[0.6, 0.2, 0.2], save_local=False, verbose=False) # self.URM_train = self.reader.get_URM_train() # self.URM_test = self.reader.get_URM_test() # self.URM_for_train, _, self.URM_validation = self.reader.split_urm( # self.URM_train.tocoo(), split_ratio=[0.75, 0, 0.25], save_local=False, verbose=False) # self.URM_train_small, _, self.URM_early_stop = self.reader.split_urm(self.URM_for_train.tocoo(), split_ratio=[0.85, 0, 0.15], save_local=False, verbose=False) # del self.URM_for_train self.URM_train, self.URM_test, self.URM_validation, self.URM_train_small, self.URM_early_stop = load_URMs( dataset, dataset_kwargs) self.evaluator_validation = EvaluatorHoldout(self.URM_validation, [self.at], exclude_seen=True) self.evaluator_earlystop = EvaluatorHoldout(self.URM_early_stop, [self.at], exclude_seen=True) self.evaluatorTest = EvaluatorHoldout(self.URM_test, [self.at, 10, 20, 50], exclude_seen=True, minRatingsPerUser=2) self.fit_params = {} modules = getattr(self.recommender_class, '__module__', None) if modules and modules.split('.')[0] == gans.__name__: self.isGAN = True # EARLY STOPPING from Maurizio's framework for baselines 对于基准框架的early stopping self.early_stopping_parameters = { 'epochs_min': 0, 'validation_every_n': 5, 'stop_on_validation': True, 'validation_metric': self.metric, 'lower_validations_allowed': 5, 'evaluator_object': self.evaluator_earlystop } # EARYL STOPPING for GAN-based recommenders 对于 基于GAN 的 推荐算法的 early stopping self.my_early_stopping = { 'allow_worse': 5, 'freq': 5, 'validation_evaluator': self.evaluator_earlystop, 'validation_set': None, 'sample_every': None, } def build_fit_params(self, params): for i, val in enumerate(params): param_name = self.dimension_names[i] if param_name in self.fit_param_names: self.fit_params[param_name] = val elif param_name == 'epochs' and self.recommender_class in early_stopping_algos: self.fit_params[param_name] = val def save_best_params(self, additional_params=None): d = dict(self.fit_params) if additional_params is not None: d.update(additional_params) with open(os.path.join(self.logsdir, 'best_params.pkl'), 'wb') as f: pickle.dump(d, f, pickle.HIGHEST_PROTOCOL) def load_best_params(self): with open(os.path.join(self.logsdir, 'best_params.pkl'), 'rb') as f: return pickle.load(f) def obj_func(self, params): """ Black-box objective function. Parameters ---------- params: list Ranges of hyperparameters to consider. List of skopt.space.space.Dimension. Returns ------- obj_func_value: float Value of the objective function as denoted by the experiment metric. """ # print('Optimizing for', self.reader.DATASET_NAME) print('Optimizing', self.recommender_class.RECOMMENDER_NAME, 'for', self.dataset_name) # Split the parameters into build_params and fit_params self.build_fit_params(params) # Create the model and fit it. try: if self.isGAN: model = self.recommender_class(self.URM_train_small, mode=train_mode, seed=seed, is_experiment=True) model.logsdir = self.logsdir fit_early_params = dict(self.fit_params) fit_early_params.update(self.my_early_stopping) last_epoch = model.fit(**fit_early_params) # Save the right number of epochs that produces the current model if last_epoch != self.fit_params['epochs']: self.fit_params['epochs'] = last_epoch - \ self.my_early_stopping['allow_worse'] * self.my_early_stopping['freq'] else: model = self.recommender_class(self.URM_train_small) if self.recommender_class in early_stopping_algos: fit_early_params = dict(self.fit_params) fit_early_params.update(self.early_stopping_parameters) model.fit(**fit_early_params) else: model.fit(**self.fit_params) results_dic, results_run_string = self.evaluator_validation.evaluateRecommender(model) fitness = -results_dic[self.at][self.metric] except tf.errors.ResourceExhaustedError: return 0 try: if fitness < self.best_res: self.best_res = fitness self.save_best_params(additional_params=dict(epochs=model.epochs_best) if self.recommender_class in early_stopping_algos else None) except AttributeError: self.best_res = fitness self.save_best_params(additional_params=model.get_early_stopping_final_epochs_dict() if self.recommender_class in early_stopping_algos else None) with open(os.path.join(self.logsdir, 'results.txt'), 'a') as f: d = self.fit_params if self.recommender_class in early_stopping_algos: d.update(model.get_early_stopping_final_epochs_dict()) d_str = json.dumps(d) f.write(d_str) f.write('\n') f.write(results_run_string) f.write('\n\n') return fitness def tune(self, params, evals=10, init_config=None, seed=None): """ Runs the hyperparameter search using Gaussian Process as surrogate model or Random Search, saves the results of the trials and print the best found parameters. 使用 高斯过程 作为 替代模型 进行 超参数 搜索 或 随机搜索 保存 并 打印 训练 得到的 最佳 参数 Parameters ---------- params: list List of skopt.space.space.Dimensions to be searched. 参数为 scikit-learn Base class for search space dimensions evals: int Number of evaluations to perform. init_config: list, default None An initial parameter configuration for seeding the Gaussian Process seed: int, default None Seed for random_state of `gp_minimize` or `dummy_minimize`. Set to a fixed integer for reproducibility. """ msg = 'Started ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name subprocess.run(['telegram-send', msg]) # URM_test CSR矩阵的shape U, I = self.URM_test.shape if self.recommender_class == GANMF: params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='emb_dim', dtype=int)) self.fit_param_names.append('emb_dim') if self.recommender_class == CFGAN or self.recommender_class == DeepGANMF: params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int)) params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='g_nodes', dtype=int)) self.fit_param_names.append('d_nodes') self.fit_param_names.append('g_nodes') if self.recommender_class == DisGANMF: params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int)) self.fit_param_names.append('d_nodes') self.dimension_names = [p.name for p in params] ''' Need to make sure that the max. value of `num_factors` parameters must be lower than the max(U, I) ''' try: idx = self.dimension_names.index('num_factors') maxval = params[idx].bounds[1] if maxval > min(U, I): params[idx] = Integer(1, min(U, I), name='num_factors', dtype=int) except ValueError: pass if len(params) > 0: # Check if there is already a checkpoint for this experiment 检查点 checkpoint_path = os.path.join(self.logsdir, 'checkpoint.pkl') checkpoint_exists = True if os.path.exists(checkpoint_path) else False checkpoint_saver = CheckpointSaver(os.path.join(self.logsdir, 'checkpoint.pkl'), compress=3) if seed is None: seed = self.seed t_start = int(time.time()) if checkpoint_exists: previous_run = skopt.load(checkpoint_path) if self.method == 'bayesian': results = gp_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals), x0=previous_run.x_iters, y0=previous_run.func_vals, n_random_starts=0, random_state=seed, verbose=True, callback=[checkpoint_saver]) else: results = dummy_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals), x0=previous_run.x_iters, y0=previous_run.func_vals, random_state=seed, verbose=True, callback=[checkpoint_saver]) else: # 超参数优化 if self.method == 'bayesian': results = gp_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True, callback=[checkpoint_saver]) else: results = dummy_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True, callback=[checkpoint_saver]) t_end = int(time.time()) # Save best parameters of this experiment # best_params = dict(zip(self.dimension_names, results.x)) # with open(os.path.join(self.logsdir, 'best_params.pkl'), 'wb') as f: # pickle.dump(best_params, f, pickle.HIGHEST_PROTOCOL) best_params = self.load_best_params() with open(os.path.join(self.logsdir, 'results.txt'), 'a') as f: f.write('Experiment ran for {}\n'.format(str(datetime.timedelta(seconds=t_end - t_start)))) f.write('Best {} score: {}. Best result found at: {}\n'.format(self.metric, results.fun, best_params)) if self.recommender_class in [IALSRecommender, MatrixFactorization_BPR_Cython]: self.dimension_names.append('epochs') self.build_fit_params(best_params.values()) # Retrain with all training data set_seed(seed) if self.isGAN: model = self.recommender_class(self.URM_train, mode=train_mode, is_experiment=True) model.logsdir = self.logsdir model.fit(**self.fit_params) # load_models(model, save_dir='best_model', all_in_folder=True) else: model = self.recommender_class(self.URM_train) model.fit(**self.fit_params) # model.loadModel(os.path.join(self.logsdir, 'best_model')) _, results_run_string = self.evaluatorTest.evaluateRecommender(model) print('\n\nResults on test set:') print(results_run_string) print('\n\n') with open(os.path.join(self.logsdir, 'result_test.txt'), 'w') as f: f.write(results_run_string) msg = 'Finished ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name subprocess.run(['telegram-send', msg])
recommender4 = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) recommender4.fit( **{ "topK": 305, "shrink": 0, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF" }) # MAP 0.049 (topK=100, l2_norm = 1e3, normalize_matrix = False, verbose = True) # recommender = EASE_R_Recommender.EASE_R_Recommender(URM_train) # recommender.fit(topK=None, l2_norm = 3 * 1e3, normalize_matrix = False, verbose = True) # MAP 0.053 #recommender = ItemKNNSimilarityHybridRecommender.ItemKNNSimilarityHybridRecommender(URM_train, w1, w2) #recommender.fit(topK=300, alpha = 0.7) recommendert = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, recommender3, recommender4) recommendert.fit(alpha=0.6) recommender = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, recommender1, recommendert) recommender.fit(alpha=0.6) print(evaluator_validation.evaluateRecommender(recommender)) item_list = recommender.recommend(target_ids, cutoff=10) CreateCSV.create_csv(target_ids, item_list, 'MyRec')
return scores # recommenderSLIM = SLIM_BPR_Recommender(URM_all) # recommenderSLIM.fit(learning_rate=0.001, epochs=5) URM_train = sps.load_npz('myFiles/train_set.npz') URM_test = sps.load_npz('myFiles/test_set.npz') URM_train = URM_train.tocsr() URM_test = URM_test.tocsr() recommenderCYTHON = SLIM_BPR_Cython(URM_train, recompile_cython=False) recommenderCYTHON.fit(epochs=3000, batch_size=100, sgd_mode='sdg', learning_rate=1e-6, topK=10) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[5]) result, result_string = evaluator_validation.evaluateRecommender(recommenderCYTHON) print(result_string) ''' filename = 'out' + '19' # REMEMBER TO CHANGE THE FILE NAME!!! print_to_csv_age_and_region(recommenderCYTHON, recommenderTP, filename) '''
def compute_group_MAP(args, group_id): block_size = args["block_size"] profile_length = args["profile_length"] sorted_users = args["sorted_users"] cutoff = args["cutoff"] URM_test = args["URM_test"] hyb = args["hyb"] hyb2 = args["hyb2"] hyb3 = args["hyb3"] hyb5 = args["hyb5"] hyb6 = args["hyb6"] hyb7 = args["hyb7"] MAP_hyb_per_group = [] MAP_hyb2_per_group = [] MAP_hyb3_per_group = [] MAP_hyb5_per_group = [] MAP_hyb6_per_group = [] MAP_hyb7_per_group = [] start_pos = group_id * block_size end_pos = min((group_id + 1) * block_size, len(profile_length)) users_in_group = sorted_users[start_pos:end_pos] users_in_group_p_len = profile_length[users_in_group] print("Group {}, average p.len {:.2f}, min {}, max {}".format(group_id, users_in_group_p_len.mean(), users_in_group_p_len.min(), users_in_group_p_len.max())) users_not_in_group_flag = np.isin(sorted_users, users_in_group, invert=True) users_not_in_group = sorted_users[users_not_in_group_flag] evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[cutoff], ignore_users=users_not_in_group) results, _ = evaluator_test.evaluateRecommender(hyb) MAP_hyb_per_group.append(results[cutoff]["MAP"]) results, _ = evaluator_test.evaluateRecommender(hyb2) MAP_hyb2_per_group.append(results[cutoff]["MAP"]) results, _ = evaluator_test.evaluateRecommender(hyb3) MAP_hyb3_per_group.append(results[cutoff]["MAP"]) results, _ = evaluator_test.evaluateRecommender(hyb5) MAP_hyb5_per_group.append(results[cutoff]["MAP"]) results, _ = evaluator_test.evaluateRecommender(hyb6) MAP_hyb6_per_group.append(results[cutoff]["MAP"]) if hyb7 is not None: results, _ = evaluator_test.evaluateRecommender(hyb7) MAP_hyb7_per_group.append(results[cutoff]["MAP"]) if hyb7 is not None: return [MAP_hyb_per_group, MAP_hyb2_per_group, MAP_hyb3_per_group, MAP_hyb5_per_group, MAP_hyb6_per_group, MAP_hyb7_per_group] else: return [MAP_hyb_per_group, MAP_hyb2_per_group, MAP_hyb3_per_group, MAP_hyb5_per_group, MAP_hyb6_per_group]
users_in_group_p_len = profile_length[users_in_group] print("Group {}, average p.len {:.2f}, min {}, max {}".format( group_id, users_in_group_p_len.mean(), users_in_group_p_len.min(), users_in_group_p_len.max())) users_not_in_group_flag = np.isin(sorted_users, users_in_group, invert=True) users_not_in_group = sorted_users[users_not_in_group_flag] evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[cutoff], ignore_users=users_not_in_group) results, _ = evaluator_test.evaluateRecommender(slim_model) MAP_slim_per_group.append(results[cutoff]["MAP"]) results, _ = evaluator_test.evaluateRecommender(rp3_model) MAP_rp3_per_group.append(results[cutoff]["MAP"]) results, _ = evaluator_test.evaluateRecommender(userCF_model) MAP_userCF_per_group.append(results[cutoff]["MAP"]) slim_model.save_model("./result_experiments/results_ensemble/", "slim_1") rp3_model.save_model("./result_experiments/results_ensemble/", "rp3_1") userCF_model.save_model("./result_experiments/results_ensemble/", "userCF_1") import matplotlib.pyplot as pyplot pyplot.plot(MAP_slim_per_group, label="slim")
def crossval(URM_all, ICM_all, target_ids, k): seed = 1234 + k #+ int(time.time()) np.random.seed(seed) tp = 0.75 URM_train, URM_test = train_test_holdout(URM_all, train_perc=tp) ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.95) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True) args = {} p3alpha = P3alphaRecommender.P3alphaRecommender(URM_train) try: args = { "topK": 991, "alpha": 0.4705816992313091, "normalize_similarity": False } p3alpha.load_model( 'SavedModels\\', p3alpha.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") p3alpha.fit(**args) p3alpha.save_model( 'SavedModels\\', p3alpha.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) rp3beta = RP3betaRecommender.RP3betaRecommender(URM_train) try: args = { "topK": 991, "alpha": 0.4705816992313091, "beta": 0.37, "normalize_similarity": False } rp3beta.load_model( 'SavedModels\\', rp3beta.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") rp3beta.fit(**args) rp3beta.save_model( 'SavedModels\\', rp3beta.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train) try: args = { "topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCF.load_model( 'SavedModels\\', itemKNNCF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") itemKNNCF.fit(**args) itemKNNCF.save_model( 'SavedModels\\', itemKNNCF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) try: args = { "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True } userKNNCF.load_model( 'SavedModels\\', userKNNCF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") userKNNCF.fit(**args) userKNNCF.save_model( 'SavedModels\\', userKNNCF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_all) try: args = { "topK": 700, "shrink": 100, "similarity": 'jaccard', "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCBF.load_model( 'SavedModels\\', itemKNNCBF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") itemKNNCBF.fit(**args) itemKNNCBF.save_model( 'SavedModels\\', itemKNNCBF.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) #cfw = CFW_D_Similarity_Linalg.CFW_D_Similarity_Linalg(URM_train, ICM_train, itemKNNCF.W_sparse) #cfw.fit(show_max_performance=False, logFile=None, loss_tolerance=1e-6, # iteration_limit=500000, damp_coeff=0.5, topK=900, add_zeros_quota=0.5, normalize_similarity=True) # Need to change bpr code to avoid memory error, useless since it's bad # bpr = SLIM_BPR_Cython(URM_train, recompile_cython=False) # bpr.fit(**{"topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05, # "lambda_j": 0.01, "learning_rate": 0.0001}) pureSVD = PureSVDRecommender.PureSVDRecommender(URM_train) pureSVD.fit(num_factors=1000) hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, p3alpha, itemKNNCBF) hyb.fit(alpha=0.5) # Kaggle MAP 0.084 rp3beta, itemKNNCBF hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, pureSVD, itemKNNCBF) hyb2.fit(alpha=0.5) # Kaggle MAP 0.08667 hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, hyb, hyb2) hyb3.fit(alpha=0.5) #hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, userKNNCF) #hyb3.fit(alpha=0.5) hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF( URM_train, ICM_all) # Kaggle MAP 0.08856 try: # Full values: "alpha_P": 0.4108657561671193, "alpha": 0.6290871066510789 args = { "topK_P": 903, "alpha_P": 0.41086575, "normalize_similarity_P": False, "topK": 448, "shrink": 20, "similarity": "tversky", "normalize": True, "alpha": 0.6290871, "feature_weighting": "TF-IDF" } hyb5.load_model( 'SavedModels\\', hyb5.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") hyb5.fit(**args) hyb5.save_model( 'SavedModels\\', hyb5.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) # hyb5.fit(**{"topK_P": 1000, "alpha_P": 0.5432601071314623, "normalize_similarity_P": True, "topK": 620, "shrink": 0, # "similarity": "tversky", "normalize": False, "alpha": 0.5707347522847057, "feature_weighting": "BM25"}) # Kaggle MAP 0.086 :( #hyb6 = ScoresHybrid3Recommender.ScoresHybrid3Recommender(URM_train, rp3beta, itemKNNCBF, p3alpha) #hyb6.fit() hyb6 = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF( URM_train, ICM_all) try: # Full values: "alpha_P": 0.5081918012150626, "alpha": 0.44740093610861603 args = { "topK_P": 623, "alpha_P": 0.5081918, "normalize_similarity_P": False, "topK": 1000, "shrink": 1000, "similarity": "tversky", "normalize": True, "alpha": 0.4474009, "beta_P": 0.0, "feature_weighting": "TF-IDF" } hyb6.load_model( 'SavedModels\\', hyb6.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp) + ".zip") except: print("Saved model not found. Fitting a new one...") hyb6.fit(**args) hyb6.save_model( 'SavedModels\\', hyb6.RECOMMENDER_NAME + toFileName(args) + ",s=" + str(seed) + ",tp=" + str(tp)) v0 = evaluator_validation.evaluateRecommender(hyb)[0][10]["MAP"] v1 = evaluator_validation.evaluateRecommender(hyb2)[0][10]["MAP"] v2 = evaluator_validation.evaluateRecommender(hyb3)[0][10]["MAP"] v3 = evaluator_validation.evaluateRecommender(hyb5)[0][10]["MAP"] v4 = evaluator_validation.evaluateRecommender(hyb6)[0][10]["MAP"] #item_list = hyb3.recommend(target_ids, cutoff=10) #CreateCSV.create_csv(target_ids, item_list, 'ItemKNNCBF__RP3beta') return [v0, v1, v2, v3, v4]
evaluator_validation, ICM_target=ICM, model_to_use="last") # We compute the similarity matrix resulting from a RP3beta recommender # Note that we have not included the code for parameter tuning, which should be done cf_parameters = { 'topK': 500, 'alpha': 0.9, 'beta': 0.7, 'normalize_similarity': True } recommender_collaborative = RP3betaRecommender(URM_train) recommender_collaborative.fit(**cf_parameters) result_dict, result_string = evaluator_test.evaluateRecommender( recommender_collaborative) print("CF recommendation quality is: {}".format(result_string)) # We get the similarity matrix # The similarity is a scipy.sparse matrix of shape |items|x|items| similarity_collaborative = recommender_collaborative.W_sparse.copy() # We instance and fit the feature weighting algorithm, it takes as input: # - The train URM # - The ICM # - The collaborative similarity matrix # Note that we have not included the code for parameter tuning, which should be done as those are just default parameters fw_parameters = { 'epochs': 200, 'learning_rate': 0.0001,
# idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part] recommended_items = np.argsort(-pred_val, axis=1).ravel()[:k] is_relevant = np.in1d(recommended_items, pos_items_array, assume_unique=True) # his_recall = Recall_at_k_batch(pred_val, pos_items_sparse, k=20)[0] # my_recall = recall(is_relevant, pos_items_array) his_ndcg = NDCG_binary_at_k_batch(pred_val, pos_items_sparse, k=100)[0] my_ndcg = ndcg(recommended_items, pos_items_array) if not np.allclose(my_ndcg, his_ndcg, atol=0.0001): pass n100_list = np.concatenate(n100_list) r20_list = np.concatenate(r20_list) r50_list = np.concatenate(r50_list) print("Test NDCG@100=%.5f (%.5f)" % (np.mean(n100_list), np.std(n100_list) / np.sqrt(len(n100_list)))) print("Test Recall@20=%.5f (%.5f)" % (np.mean(r20_list), np.std(r20_list) / np.sqrt(len(r20_list)))) print("Test Recall@50=%.5f (%.5f)" % (np.mean(r50_list), np.std(r50_list) / np.sqrt(len(r50_list)))) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator = EvaluatorHoldout(test_data_te, cutoff_list=[20, 50, 100]) results_dict, results_run_string = evaluator.evaluateRecommender(recommender) print(results_run_string)
def crossval(URM_all, ICM_all, target_ids, k): seed = 1234 + k #+ int(time.time()) np.random.seed() URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.90) ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.95) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True) args = {} p3alpha = P3alphaRecommender.P3alphaRecommender(URM_train) args = { "topK": 991, "alpha": 0.4705816992313091, "normalize_similarity": False } p3alpha.fit(**args) #p3alpha2 = P3alphaRecommender.P3alphaRecommender(URM_train) #args = {"topK": 400, "alpha": 0.5305816992313091, "normalize_similarity": False} #p3alpha2.fit(**args) #rp3beta = RP3betaRecommender.RP3betaRecommender(URM_train) #args = {"topK": 991, "alpha": 0.4705816992313091, "beta": 0.15, "normalize_similarity": False} #rp3beta.fit(**args) itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train) args = { "topK": 1000, "shrink": 732, "similarity": "cosine", "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCF.fit(**args) userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train) args = { "topK": 131, "shrink": 2, "similarity": "cosine", "normalize": True } userKNNCF.fit(**args) itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_all) args = { "topK": 700, "shrink": 100, "similarity": 'jaccard', "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCBF.fit(**args) itemKNNCBF2 = ItemKNNCBFRecommender.ItemKNNCBFRecommender( URM_train, ICM_all) args = { "topK": 200, "shrink": 15, "similarity": 'jaccard', "normalize": True, "feature_weighting": "TF-IDF" } itemKNNCBF2.fit(**args) #cfw = CFW_D_Similarity_Linalg.CFW_D_Similarity_Linalg(URM_train, ICM_train, itemKNNCF.W_sparse) #cfw.fit(show_max_performance=False, logFile=None, loss_tolerance=1e-6, # iteration_limit=500000, damp_coeff=0.5, topK=900, add_zeros_quota=0.5, normalize_similarity=True) # Need to change bpr code to avoid memory error, useless since it's bad #bpr = SLIM_BPR_Cython(URM_train, recompile_cython=False) #bpr.fit(**{"topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05, # "lambda_j": 0.01, "learning_rate": 0.0001}) pureSVD = PureSVDRecommender.PureSVDRecommender(URM_train) pureSVD.fit(num_factors=340) #hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, itemKNNCBF) #hyb.fit(alpha=0.5) hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, itemKNNCBF, pureSVD) hyb.fit(alpha=0.5) # Kaggle MAP 0.084 rp3beta, itemKNNCBF #hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, itemKNNCBF) #hyb2.fit(alpha=0.5) hyb2 = ItemKNNSimilarityHybridRecommender.ItemKNNSimilarityHybridRecommender( URM_train, itemKNNCBF.W_sparse, itemKNNCF.W_sparse) hyb2.fit(topK=1600) # Kaggle MAP 0.08667 hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender( URM_train, hyb, hyb2) hyb3.fit(alpha=0.5) #hyb3 = RankingHybrid.RankingHybrid(URM_train, hyb, hyb2) #hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, userKNNCF) #hyb3.fit(alpha=0.5) hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF( URM_train, ICM_all) # Kaggle MAP 0.08856 args = { "topK_P": 903, "alpha_P": 0.4108657561671193, "normalize_similarity_P": False, "topK": 448, "shrink": 20, "similarity": "tversky", "normalize": True, "alpha": 0.6290871066510789, "feature_weighting": "TF-IDF" } hyb5.fit(**args) # hyb5.fit(**{"topK_P": 1000, "alpha_P": 0.5432601071314623, "normalize_similarity_P": True, "topK": 620, "shrink": 0, # "similarity": "tversky", "normalize": False, "alpha": 0.5707347522847057, "feature_weighting": "BM25"}) # Kaggle MAP 0.086 :( #hyb6 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, hyb3, hyb5) #hyb6.fit() hyb6 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF( URM_train, ICM_all) args = { "topK_P": 756, "alpha_P": 0.5292654015790155, "normalize_similarity_P": False, "topK": 1000, "shrink": 47, "similarity": "tversky", "normalize": False, "alpha": 0.5207647439152092, "feature_weighting": "none" } hyb6.fit(**args) '''hyb6 = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF(URM_train, ICM_all) args = {"topK_P": 623, "alpha_P": 0.5081918012150626, "normalize_similarity_P": False, "topK": 1000, "shrink": 1000, "similarity": "tversky", "normalize": True, "alpha": 0.44740093610861603, "beta_P": 0.0, "feature_weighting": "TF-IDF"} hyb6.fit(**args)''' hyb7 = RankingHybrid.RankingHybrid(URM_train, hyb6, hyb3) v0 = evaluator_validation.evaluateRecommender(hyb)[0][10]["MAP"] v1 = evaluator_validation.evaluateRecommender(hyb2)[0][10]["MAP"] v2 = evaluator_validation.evaluateRecommender(hyb3)[0][10]["MAP"] #v2 = 0 v3 = evaluator_validation.evaluateRecommender(hyb5)[0][10]["MAP"] v4 = evaluator_validation.evaluateRecommender(hyb6)[0][10]["MAP"] #v4 = 0 v5 = evaluator_validation.evaluateRecommender(hyb7)[0][10]["MAP"] #item_list = hyb6.recommend(target_ids, cutoff=10) #CreateCSV.create_csv(target_ids, item_list, 'HybPureSVD') return [v0, v1, v2, v3, v4, v5]
def single_test(i): evaluator_test = EvaluatorHoldout(n_urm_test[i], cutoff_list=[10]) #n_recommender[i].fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li) n_recommender[i].fit(alpha=alpha, beta=beta, topK=int(topK)) result, str_result = evaluator_test.evaluateRecommender(n_recommender[i]) return result[10]['MAP']
if __name__ == '__main__': seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_all = parser.get_ICM_all() URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.85, seed=seed) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) rec1 = ItemKNNCBFRecommender(URM_train, ICM_all) rec2 = SLIMElasticNetRecommender(URM_train) # 'topK': 40, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' rec1.fit(topK=40, shrink=1000, similarity='cosine', feature_weighting='BM25') # topK': 140, 'l1_ratio': 1e-05, 'alpha': 0.386 rec2.fit(topK=140, l1_ratio=1e-5, alpha=0.386) print("recomenders are ready") merged_recommender = MergedHybrid000(URM_train, content_recommender=rec1, collaborative_recommender=rec2) for alpha in np.arange(0, 1, 0.1): merged_recommender.fit(alpha) result, _ = evaluator_test.evaluateRecommender(merged_recommender) print(alpha, result[10]['MAP'])
#cf.fit(**{"topK": 259, "shrink": 24, "similarity": "cosine", "normalize": True}) #W_sparse_CF = cf.W_sparse #hyb7 = CFW_D_Similarity_Linalg.CFW_D_Similarity_Linalg(URM_train, ICM_all, W_sparse_CF) #hyb7.fit(**{"topK": 575, "add_zeros_quota": 0.6070346405411541, "normalize_similarity": False}) hyb7 = ScoresHybridSpecializedV2Mid.ScoresHybridSpecializedV2Mid( URM_ICM_train, URM_ICM_train.T) hyb7.fit( **{ "topK_P": 516, "alpha_P": 0.4753488773601332, "normalize_similarity_P": False, "topK": 258, "shrink": 136, "similarity": "asymmetric", "normalize": False, "alpha": 0.48907705969537585, "feature_weighting": "BM25" }) print(evaluator_validation.evaluateRecommender(svd)) print(evaluator_validation.evaluateRecommender(itemKNNCBF)) print(evaluator_validation.evaluateRecommender(itemKNNCBF2)) print(evaluator_validation.evaluateRecommender(itemKNNCBF3)) print(evaluator_validation.evaluateRecommender(hyb7)) print(evaluator_validation.evaluateRecommender(hyb5)) print(evaluator_validation.evaluateRecommender(hyb6)) #item_list = recommender.recommend(target_ids, cutoff=10) #CreateCSV.create_csv(target_ids, item_list, 'MyRec')
# recommenderBetaGRAPH = RP3betaRecommender(URM_train) # recommenderBetaGRAPH.fit(topK=54, implicit=True, normalize_similarity=True, alpha=1e-6, beta=0.2, min_rating=0) # recommenderSLIMELASTIC = SLIMElasticNetRecommender(URM_all) # recommenderSLIMELASTIC.fit(topK=10, alpha=1e-4) # recommenderSLIMELASTIC.save_model('model/', file_name='SLIM_ElasticNet') # recommenderCYTHON = SLIM_BPR_Cython(URM_train, recompile_cython=False) # recommenderCYTHON.fit(epochs=350, batch_size=200, sgd_mode='adagrad', learning_rate=0.001, topK=10) # URM_validation = sps.load_npz('URM/URM_validation.npz') evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) result, result_string = evaluator_test.evaluateRecommender(recommenderCB) print(result_string) # evaluate_algorithm(URM_test, recommenderGRAPH, recommenderTP, at=10) ''' x_tick = [x for x in range(15, 28, 3)] MAP_per_k = [] trains = [] tests = [] for i in range(4): URM_train, URM_test = splitURM(URM_all) trains.append(URM_train) tests.append(URM_test) i = 0
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8) itemCF_recommender = ItemKNNCFRecommender(URM_train) itemCF_recommender.fit(**itemCFParam) slim_recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False) slim_recommender.fit(**slimParam) p3_recommender = P3alphaRecommender(URM_train) p3_recommender.fit(**p3Param) recommender1 = SimilarityHybridRecommender(URM_train, itemCF_recommender.W_sparse, slim_recommender.W_sparse, p3_recommender.W_sparse) recommender1.fit(topK=100, alpha1=alpha1, alpha2=alpha2, alpha3=alpha3) evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10]) eval_res = evaluator_validation.evaluateRecommender(recommender1) MAP = eval_res[0][10]['MAP'] print("The MAP in one test is: ", MAP) itemCF_recommender = ItemKNNCFRecommender(URM_all) itemCF_recommender.fit(**itemCFParam) slim_recommender = SLIM_BPR_Cython(URM_all, recompile_cython=False) slim_recommender.fit(**slimParam) p3_recommender = P3alphaRecommender(URM_all) p3_recommender.fit(**p3Param) recommender1 = SimilarityHybridRecommender(URM_all, itemCF_recommender.W_sparse, slim_recommender.W_sparse, p3_recommender.W_sparse) recommender1.fit(topK=100, alpha1=alpha1, alpha2=alpha2, alpha3=alpha3) recommender1.save_model("model/", "hybrid_item_slim_basic")
# recommender = HybridGenRecommender(urm_train, eurm=True) # recommender.fit() # recommender = ItemKNNCBFRecommender(urm_train, icm_all) # recommender.fit(shrink=40, topK=20, feature_weighting='BM25') recommender = HybridNorm3Recommender(urm_train) recommender.fit() normal_recommender = recommender if test: if temperature == 'cold': result, str_result = evaluator_test.evaluateRecommender( cold_recommender) print('The Map is : {}'.format(result[10]['MAP'])) if temperature == 'zero': result, str_result = evaluator_test.evaluateRecommender( zero_recommender) print('The Map of test is : {}'.format(result[10]['MAP'])) # if valid: # result, str_result = evaluator_valid.evaluateRecommender(zero_recommender) # print('The Map of valid is : {}'.format(result[10]['MAP'])) if temperature == 'warm': result, str_result = evaluator_test.evaluateRecommender( warm_recommender) print('The Map is : {}'.format(result[10]['MAP'])) # if valid: # result, str_result = evaluator_valid.evaluateRecommender(warm_recommender)