class LinearHybrid003(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "LinearHybrid003" # set the seed equal to the one of the parameter search!!!! def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205): super(LinearHybrid003, self).__init__(URM_train, verbose=verbose) self.URM_train = URM_train self.ICM_train = ICM_train # seed 1205: 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True self.__rec1 = UserKNNCFRecommender(URM_train, verbose=False) self.__rec1_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } self.__rec2 = P3alphaRecommender(URM_train, verbose=False) self.__rec2_params = { 'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False } # seed 1205: 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' self.__rec3 = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False) self.__rec3_params = { 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } self.__a = self.__b = self.__c = None self.seed = seed self.__submission = submission def fit(self, alpha=0.5, l1_ratio=0.5): self.__a = alpha * l1_ratio self.__b = alpha - self.__a self.__c = 1 - self.__a - self.__b if not self.__submission: try: self.__rec1.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec1.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...") self.__rec1.fit(**self.__rec1_params) print(f"done.") self.__rec1.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec2.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec2.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...") self.__rec2.fit(**self.__rec2_params) print(f"done.") self.__rec2.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec3.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec3.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...") self.__rec3.fit(**self.__rec3_params) print(f"done.") self.__rec3.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') else: self.__rec1.fit(**self.__rec1_params) self.__rec2.fit(**self.__rec2_params) self.__rec3.fit(**self.__rec3_params) def _compute_item_score(self, user_id_array, items_to_compute=None): item_weights_1 = self.__rec1._compute_item_score(user_id_array) item_weights_2 = self.__rec2._compute_item_score(user_id_array) item_weights_3 = self.__rec3._compute_item_score(user_id_array) item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c return item_weights def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save={}) self._print("Saving complete")
print(f"Fitting {ucf.RECOMMENDER_NAME} ...") ucf.fit(**ucf_params) print(f"done.") ucf.save_model(f'stored_recommenders/seed_{str(seed)}_{ucf.RECOMMENDER_NAME}/', 'for_notebook_analysis') icb = ItemKNNCBFRecommender(URM_train, ICM_all, verbose=False) icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True} try: icb.load_model(f'stored_recommenders/seed_{str(seed)}_{icb.RECOMMENDER_NAME}/', 'for_notebook_analysis') print(f"{icb.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {icb.RECOMMENDER_NAME} ...") icb.fit(**icb_params) print(f"done.") icb.save_model(f'stored_recommenders/seed_{str(seed)}_{icb.RECOMMENDER_NAME}/', 'for_notebook_analysis') list_recommender = [sslim, icb, ucf] best_recommender = HybridCombinationSearch(URM_train, ICM_all, list_recommender) params = {'alpha': 0.6461624491197696, 'l1_ratio': 0.7617220099582368} best_recommender.fit(**params) user_ids = parser.get_ratings().user_id.unique() cutoff = 20 user_recommendations_items = [] user_recommendations_user_id = [] target = [] for n_user in user_ids: recommendations = best_recommender.recommend(n_user, cutoff=20) user_recommendations_items.extend(recommendations)
class PipeHybrid001(RP3betaRecommender): RECOMMENDER_NAME = "PipeHybrid001" def __init__(self, URM_train, ICM_train,verbose=True): super(PipeHybrid001, self).__init__(URM_train, verbose = verbose) self.URM_train_recommendation = URM_train self.ICM_train = ICM_train self.__content_recommender = ItemKNNCBFRecommender(URM_train, ICM_train) #print("fitting ItemKNNCBF...") try: self.__content_recommender.load_model('stored_recommenders/ItemKNNCBFRecommender/best_at_26_10_20') except: self.__content_recommender.fit(topK=140, shrink=1000, similarity='cosine', normalize=True, feature_weighting='BM25') # best parameter up to now self.__content_recommender.save_model('stored_recommenders/ItemKNNCBFRecommender/best_at_26_10_20') #print("... done") #print(f"URM_train shape: {URM_train.shape}") #print(f"W_sparse knn shape: {self.__content_recommender.W_sparse.shape}") self.URM_train = URM_train.dot(self.__content_recommender.W_sparse) self._URM_train_format_checked = False self._W_sparse_format_checked = False def recommend(self, user_id_array, cutoff = None, remove_seen_flag=True, items_to_compute = None, remove_top_pop_flag = False, remove_custom_items_flag = False, return_scores = False): """ redefinition using self.URM_train_recommendation, not the new URM train of the RP3Beta algorithm """ # If is a scalar transform it in a 1-cell array if np.isscalar(user_id_array): user_id_array = np.atleast_1d(user_id_array) single_user = True else: single_user = False if cutoff is None: cutoff = self.URM_train_recommendation.shape[1] - 1 # Compute the scores using the model-specific function # Vectorize over all users in user_id_array scores_batch = self._compute_item_score(user_id_array, items_to_compute=items_to_compute) for user_index in range(len(user_id_array)): user_id = user_id_array[user_index] if remove_seen_flag: scores_batch[user_index,:] = self._remove_seen_on_scores(user_id, scores_batch[user_index, :]) # Sorting is done in three steps. Faster then plain np.argsort for higher number of items # - Partition the data to extract the set of relevant items # - Sort only the relevant items # - Get the original item index # relevant_items_partition = (-scores_user).argpartition(cutoff)[0:cutoff] # relevant_items_partition_sorting = np.argsort(-scores_user[relevant_items_partition]) # ranking = relevant_items_partition[relevant_items_partition_sorting] # # ranking_list.append(ranking) if remove_top_pop_flag: scores_batch = self._remove_TopPop_on_scores(scores_batch) if remove_custom_items_flag: scores_batch = self._remove_custom_items_on_scores(scores_batch) # relevant_items_partition is block_size x cutoff relevant_items_partition = (-scores_batch).argpartition(cutoff, axis=1)[:,0:cutoff] # Get original value and sort it # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1) # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]] relevant_items_partition_original_value = scores_batch[np.arange(scores_batch.shape[0])[:, None], relevant_items_partition] relevant_items_partition_sorting = np.argsort(-relevant_items_partition_original_value, axis=1) ranking = relevant_items_partition[np.arange(relevant_items_partition.shape[0])[:, None], relevant_items_partition_sorting] ranking_list = [None] * ranking.shape[0] # Remove from the recommendation list any item that has a -inf score # Since -inf is a flag to indicate an item to remove for user_index in range(len(user_id_array)): user_recommendation_list = ranking[user_index] user_item_scores = scores_batch[user_index, user_recommendation_list] not_inf_scores_mask = np.logical_not(np.isinf(user_item_scores)) user_recommendation_list = user_recommendation_list[not_inf_scores_mask] ranking_list[user_index] = user_recommendation_list.tolist() #TEST """ user_profile_array = self.URM_train[user_id_array[user_index]] if np.empty(user_profile_array): print(f"WARNING! {user_index} is a cold user!") rec = TopPop(URM_train) rec.fit() ranking_list[user_index]=rec.recommend([user_id_array[user_index]], cutoff=cutoff) """ # Return single list for one user, instead of list of lists if single_user: ranking_list = ranking_list[0] if return_scores: return ranking_list, scores_batch else: return ranking_list
class LinearHybridC001(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "LinearHybridC001" """ This hybrid works for users who have a profile length shorter or equal to 2 interactions """ # set the seed equal to the one of the parameter search!!!! def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205): super(LinearHybridC001, self).__init__(URM_train, verbose=verbose) self.URM_train = URM_train self.ICM_train = ICM_train # seed 1205: {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612, 'epsilon': # 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20} self.__rec1 = IALSRecommender(URM_train, verbose=False) self.__rec1_params = { 'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612, 'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 15 } #### -5!! # seed 1205: {'topK': 225, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': # 'BM25'} self.__rec2 = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False) self.__rec2_params = { 'topK': 225, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } # seed 1205: {'topK': 220, 'shrink': 175, 'similarity': 'cosine', 'normalize': False} self.__rec3 = ItemKNNCFRecommender(URM_train, verbose=False) self.__rec3_params = { 'topK': 220, 'shrink': 175, 'similarity': 'cosine', 'normalize': False } self.__a = self.__b = self.__c = None self.seed = seed self.__submission = submission def fit(self, alpha=0.5, l1_ratio=0.5): self.__a = alpha * l1_ratio self.__b = alpha - self.__a self.__c = 1 - self.__a - self.__b if not self.__submission: try: self.__rec1.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec1.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...") self.__rec1.fit(**self.__rec1_params) print(f"done.") self.__rec1.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec2.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec2.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...") self.__rec2.fit(**self.__rec2_params) print(f"done.") self.__rec2.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec3.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec3.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...") self.__rec3.fit(**self.__rec3_params) print(f"done.") self.__rec3.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') else: self.__rec1.fit(**self.__rec1_params) self.__rec2.fit(**self.__rec2_params) self.__rec3.fit(**self.__rec3_params) def _compute_item_score(self, user_id_array, items_to_compute=None): item_weights_1 = self.__rec1._compute_item_score(user_id_array) item_weights_2 = self.__rec2._compute_item_score(user_id_array) item_weights_3 = self.__rec3._compute_item_score(user_id_array) item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c return item_weights def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save={}) self._print("Saving complete")
class LinearHybrid008(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "LinearHybrid008" # set the seed equal to the one of the parameter search!!!! def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205): super(LinearHybrid008, self).__init__(URM_train, verbose=verbose) self.URM_train = URM_train self.ICM_train = ICM_train self.__rec1 = SSLIMElasticNet(URM_train, ICM_train, verbose=False) self.__rec1_params = { 'beta': 0.4849594591575789, 'topK': 1000, 'l1_ratio': 1e-05, 'alpha': 0.001 } self.__rec2 = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False) self.__rec2_params = { 'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True } self.__rec3 = UserKNNCFRecommender(URM_train, verbose=False) self.__rec3_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } self.__a = self.__b = self.__c = None self.seed = seed self.__submission = submission def fit(self, alpha=0.5, l1_ratio=0.5): self.__a = alpha * l1_ratio self.__b = alpha - self.__a self.__c = 1 - self.__a - self.__b if not self.__submission: try: self.__rec1.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec1.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...") self.__rec1.fit(**self.__rec1_params) print(f"done.") self.__rec1.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec2.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec2.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...") self.__rec2.fit(**self.__rec2_params) print(f"done.") self.__rec2.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec3.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec3.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...") self.__rec3.fit(**self.__rec3_params) print(f"done.") self.__rec3.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') else: self.__rec1.fit(**self.__rec1_params) self.__rec2.fit(**self.__rec2_params) self.__rec3.fit(**self.__rec3_params) def _compute_item_score(self, user_id_array, items_to_compute=None): item_weights_1 = self.__rec1._compute_item_score(user_id_array) item_weights_2 = self.__rec2._compute_item_score(user_id_array) item_weights_3 = self.__rec3._compute_item_score(user_id_array) # normalization item_weights_1_max = item_weights_1.max() item_weights_2_max = item_weights_2.max() item_weights_3_max = item_weights_3.max() if not item_weights_1_max == 0: item_weights_1 /= item_weights_1_max if not item_weights_2_max == 0: item_weights_2 /= item_weights_2_max if not item_weights_3_max == 0: item_weights_3 /= item_weights_3_max item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c return item_weights def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save={}) self._print("Saving complete")
def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205): super(LinearOverMerged001, self).__init__(URM_train, verbose=verbose) self.URM_train = URM_train self.ICM_train = ICM_train self.__submission = submission self.__rec1 = UserKNNCFRecommender(URM_train, verbose=False) self.__rec1_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } self.seed = seed icb = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False) icb_params = { 'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True } rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = { 'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False } sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = { 'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143 } if not self.__submission: try: icb.load_model( f'stored_recommenders/seed_{str(self.seed)}_{icb.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{icb.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {icb.RECOMMENDER_NAME} ...") icb.fit(**icb_params) print(f"done.") icb.save_model( f'stored_recommenders/seed_{str(self.seed)}_{icb.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: rp3b.load_model( f'stored_recommenders/seed_{str(self.seed)}_{rp3b.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{rp3b.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {rp3b.RECOMMENDER_NAME} ...") rp3b.fit(**rp3b_params) print(f"done.") rp3b.save_model( f'stored_recommenders/seed_{str(self.seed)}_{rp3b.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: sen.load_model( f'stored_recommenders/seed_{str(self.seed)}_{sen.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{sen.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {sen.RECOMMENDER_NAME} ...") sen.fit(**sen_params) print(f"done.") sen.save_model( f'stored_recommenders/seed_{str(self.seed)}_{sen.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') else: icb.fit(**icb_params) rp3b.fit(**rp3b_params) sen.fit(**sen_params) self.__rec2 = HiddenMergedRecommender(URM_train, ICM_train, [icb, rp3b, sen], verbose=False) self.__rec2_params = { 'alpha': 0.6355738550417837, 'l1_ratio': 0.6617849709204384, 'topK': 538 } self.__a = self.__b = None
class LinearHybridW001(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "LinearHybridW001" """ This hybrid works for users who have a profile length greater than or equal to 3 interactions """ # set the seed equal to the one of the parameter search!!!! def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205): super(LinearHybridW001, self).__init__(URM_train, verbose=verbose) self.URM_train = URM_train self.ICM_train = ICM_train # seed 1205: {'topK': 205, 'shrink': 1000, 'similarity': 'cosine', # 'normalize': True, 'feature_weighting': 'BM25'} self.__rec1 = ItemKNNCBFRecommender(URM_train,ICM_train, verbose=False) self.__rec1_params = {'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25'} # seed 1205: {'topK': 565, 'shrink': 554, 'similarity': 'tversky', 'normalize': True, # 'tversky_alpha': 1.9109121434662428, 'tversky_beta': 1.7823834698905734} self.__rec2 = ItemKNNCFRecommender(URM_train, verbose=False) self.__rec2_params = {'topK': 565, 'shrink': 554, 'similarity': 'tversky', 'normalize': True, 'tversky_alpha': 1.9109121434662428, 'tversky_beta': 1.7823834698905734} # seed 1205: {'topK': 753, 'alpha': 0.3873710051288722, 'beta': 0.0, 'normalize_similarity': False} self.__rec3 = RP3betaRecommender(URM_train, verbose=False) self.__rec3_params = {'topK': 753, 'alpha': 0.3873710051288722, 'beta': 0.0, 'normalize_similarity': False} self.__a = self.__b = self.__c = None self.seed = seed self.__submission = submission def fit(self, alpha=0.5, l1_ratio=0.5): self.__a = alpha * l1_ratio self.__b = alpha - self.__a self.__c = 1 - self.__a - self.__b if not self.__submission: try: self.__rec1.load_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec1.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...") self.__rec1.fit(**self.__rec1_params) print(f"done.") self.__rec1.save_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec2.load_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec2.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...") self.__rec2.fit(**self.__rec2_params) print(f"done.") self.__rec2.save_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') try: self.__rec3.load_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') print(f"{self.__rec3.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...") self.__rec3.fit(**self.__rec3_params) print(f"done.") self.__rec3.save_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', f'best_for_{self.RECOMMENDER_NAME}') else: self.__rec1.fit(**self.__rec1_params) self.__rec2.fit(**self.__rec2_params) self.__rec3.fit(**self.__rec3_params) def _compute_item_score(self, user_id_array, items_to_compute=None): item_weights_1 = self.__rec1._compute_item_score(user_id_array) item_weights_2 = self.__rec2._compute_item_score(user_id_array) item_weights_3 = self.__rec3._compute_item_score(user_id_array) item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c return item_weights def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save={}) self._print("Saving complete")
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85, seed=seed) collaborative_algorithm_list = [ # EASE_R_Recommender # PipeHybrid001, # Random, # TopPop, # P3alphaRecommender, # RP3betaRecommender, # ItemKNNCFRecommender, # UserKNNCFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, # PureSVDRecommender, # NMFRecommender, # PureSVDItemRecommender # SLIM_BPR_Cython, # SLIMElasticNetRecommender # IALSRecommender # MF_MSE_PyTorch # MergedHybrid000 # LinearHybrid002ggg HybridCombinationSearch ] content_algorithm_list = [ # ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_valid_hybrid, "lower_validations_allowed": 5, "validation_metric": 'MAP', } print('IALS training...') ials = IALSRecommender(URM_train, verbose=False) ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612, 'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20} ials.fit(**ials_params, **earlystopping_keywargs) print("Done") print("PureSVD training...") psvd = PureSVDRecommender(URM_train, verbose=False) psvd_params = {'num_factors': 711} psvd.fit(**psvd_params) print("Done") """ rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = {'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False} try: rp3b.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{rp3b.RECOMMENDER_NAME}_for_second_search') print(f"{rp3b.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {rp3b.RECOMMENDER_NAME} ...") rp3b.fit(**rp3b_params) print(f"done.") rp3b.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{rp3b.RECOMMENDER_NAME}_for_second_search') p3a = P3alphaRecommender(URM_train, verbose=False) p3a_params = {'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False} try: p3a.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{p3a.RECOMMENDER_NAME}_for_second_search') print(f"{p3a.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {p3a.RECOMMENDER_NAME} ...") p3a.fit(**p3a_params) print(f"done.") p3a.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{p3a.RECOMMENDER_NAME}_for_second_search') icf = ItemKNNCFRecommender(URM_train, verbose=False) icf_params = {'topK': 55, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0} try: icf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icf.RECOMMENDER_NAME}_for_second_search') print(f"{icf.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {icf.RECOMMENDER_NAME} ...") icf.fit(**icf_params) print(f"done.") icf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icf.RECOMMENDER_NAME}_for_second_search') ucf = UserKNNCFRecommender(URM_train, verbose=False) ucf_params = {'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True} try: ucf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{ucf.RECOMMENDER_NAME}_for_second_search') print(f"{ucf.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {ucf.RECOMMENDER_NAME} ...") ucf.fit(**ucf_params) print(f"done.") ucf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{ucf.RECOMMENDER_NAME}_for_second_search') icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False) icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True} try: icb.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icb.RECOMMENDER_NAME}_for_second_search') print(f"{icb.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {icf.RECOMMENDER_NAME} ...") icb.fit(**icb_params) print(f"done.") icb.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icb.RECOMMENDER_NAME}_for_second_search') sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = {'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143} try: sen.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{sen.RECOMMENDER_NAME}_for_second_search') print(f"{sen.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {sen.RECOMMENDER_NAME} ...") sen.fit(**sen_params) print(f"done.") sen.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{sen.RECOMMENDER_NAME}_for_second_search') print("\nStart.") list_recommender = [icb, icf, ucf, p3a, rp3b, sen] list_already_seen = [] combinations_already_seen = [] """ (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen), (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen) """ for rec_perm in combinations(list_recommender, 3): if rec_perm not in combinations_already_seen: recommender_names = '_'.join([r.RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_v3/seed_" + str( seed) + '/linear_combination/' + recommender_names + '/' print(F"\nTESTING THE COMBO {recommender_names}") # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_hybrid, evaluator_validation=evaluator_valid_hybrid, #evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, # similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False, list_rec=rec_perm) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)