def run(self, num_factors, urm_train=None, urm=None, urm_test=None, targetids=None, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- num_factors : int, number of latent factors urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the entire URM for which the targetids corresponds to the row indexes. urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test() targetids : list, target user ids. If None, use: data.get_target_playlists() Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ _urm = data.get_urm_train() _icm = data.get_icm() _urm_test = data.get_urm_test() _targetids = data.get_target_playlists() #_targetids = data.get_all_playlists() start = time.time() urm_train = _urm if urm_train is None else urm_train #urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(urm_train=urm_train, num_factors=num_factors) recs = self.recommend_batch(userids=targetids) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name=self.name, verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
def validate(self, factors_array, iteration_array, urm_train=data.get_urm_train(), urm_test=data.get_urm_test(), verbose=True, write_on_file=True, userids=data.get_target_playlists(), N=10, filter_already_liked=True, items_to_exclude=[]): #create the initial model recommender = Pure_SVD() path = 'validation_results/' name = 'pure_SVD' folder = time.strftime('%d-%m-%Y') filename = '{}/{}/{}{}.csv'.format(path, folder, name, time.strftime('_%H-%M-%S')) # create dir if not exists os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, 'w') as out: for f in factors_array: for i in iteration_array: #train the model with the parameters if verbose: print( '\n\nTraining PURE_SVD with\n Factors: {}\n Iteration: {}\n' .format(f, i)) print('\n training phase...') recommender.fit(urm_train=urm_train, num_factors=f, iteration=i) #get the recommendations from the trained model recommendations = recommender.recommend_batch( userids=userids, N=N, filter_already_liked=filter_already_liked, items_to_exclude=items_to_exclude) #evaluate the model with map10 map10 = recommender.evaluate(recommendations, test_urm=urm_test) if verbose: print('map@10: {}'.format(map10)) #write on external files on folder models_validation if write_on_file: out.write( '\n\nFactors: {}\n Iteration: {}\n evaluation map@10: {}' .format(f, i, map10))
def validate(self, epochs=200, user_ids=d.get_target_playlists(), batch_size = [1000], validate_every_N_epochs = 5, start_validation_after_N_epochs = 0, lambda_i = [0.0], lambda_j = [0.0], learning_rate = [0.01], topK = [200], sgd_mode='adagrad', log_path=None): """ train the model finding matrix W :param epochs(int) :param batch_size(list) after how many items the params should be updated :param lambda_i(list) first regularization term :param lambda_j(list) second regularization term :param learning_rate(list) algorithm learning rate :param topK(list) how many elements should be taken into account while computing URM*W :param sgd_mode(string) optimization algorithm :param user_ids(list) needed if we'd like to perform validation :param validate_every_N_epochs(int) how often the MAP evaluation should be displayed :param start_validation_after_N_epochs(int) :param log_path(string) folder to which the validation results should be saved """ if log_path != None: orig_stdout = sys.stdout f = open(log_path + '/' + self.name + ' ' + time.strftime('_%H-%M-%S') + ' ' + time.strftime('%d-%m-%Y') + '.txt', 'w') sys.stdout = f for li in lambda_i: for lj in lambda_j: for tk in topK: for lr in learning_rate: for b in batch_size: print(self._print(epochs=epochs, batch_size=b, lambda_i=li, lambda_j=lj, learning_rate=lr, topK=tk, sgd_mode=sgd_mode)) s.fit(URM_train=d.get_urm_train(), epochs=epochs, URM_test=d.get_urm_test(), user_ids=user_ids, batch_size=b, validate_every_N_epochs=validate_every_N_epochs, start_validation_after_N_epochs=start_validation_after_N_epochs, lambda_i = li, lambda_j = lj, learning_rate = lr, topK=tk, sgd_mode=sgd_mode ) if log_path != None: sys.stdout = orig_stdout f.close()
def fit(self, URM_train=d.get_urm_train(), epochs=30, URM_test=d.get_urm_test(), user_ids=d.get_target_playlists(), batch_size = 1000, validate_every_N_epochs = 1, start_validation_after_N_epochs = 0, lambda_i = 0.0, lambda_j = 0.0, learning_rate = 0.01, topK = 200, sgd_mode='adagrad'): """ train the model finding matrix W :param epochs(int) :param batch_size(int) after how many items the params should be updated :param lambda_i(float) first regularization term :param lambda_j(float) second regularization term :param learning_rate(float) algorithm learning rate :param topK(int) how many elements should be taken into account while computing URM*W :param sgd_mode(string) optimization algorithm :param URM_train(csr_matrix) the URM used to train the model. Either the full or the validation one :param URM_test(csr_matrix) needed if we'd like to perform validation :param user_ids(list) needed if we'd like to perform validation :param validate_every_N_epochs(int) how often the MAP evaluation should be displayed :param start_validation_after_N_epochs(int) """ self.URM_train = URM_train self.n_users = URM_train.shape[0] self.n_items = URM_train.shape[1] self.sgd_mode = sgd_mode from Cython.SLIM_BPR.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch self.cythonEpoch = SLIM_BPR_Cython_Epoch(self.URM_train, sparse_weights = False, topK=topK, learning_rate=learning_rate, li_reg = lambda_i, lj_reg = lambda_j, batch_size=1, symmetric = True, sgd_mode = sgd_mode) # Cal super.fit to start training self._fit_alreadyInitialized(epochs=epochs, logFile=None, URM_test=URM_test, user_ids=user_ids, filterTopPop=False, minRatingsPerUser=1, batch_size=batch_size, validate_every_N_epochs=validate_every_N_epochs, start_validation_after_N_epochs=start_validation_after_N_epochs, lambda_i = lambda_i, lambda_j = lambda_j, learning_rate = learning_rate, topK = topK)
ranking = relevant_items_partition[relevant_items_partition_sorting] if with_scores: best_scores = scores[ranking] return [userid] + [list(zip(list(ranking), list(best_scores)))] else: return [userid] + list(ranking) def _filter_seen_on_scores(self, user_id, scores): seen = self.URM_train.indices[self.URM_train.indptr[user_id]:self.URM_train.indptr[user_id + 1]] scores[seen] = -np.inf return scores def get_r_hat(self, load_from_file=False, path=''): if load_from_file: return load_npz(path) else: return self.URM_train[d.get_target_playlists()].dot(self.W_sparse) # test s = SLIM_BPR(d.get_urm_train()) s.fit(epochs=100, validate_every_N_epochs=101, learning_rate=1e-2, lambda_i = 1e-4, lambda_j = 1e-4) # s.evaluate(recs, d.get_urm_test(), print_result=True) # importexport.exportcsv(recs, 'submission', 'SLIM_BPR') s.save_r_hat(evaluation=True)
def validate_als(self, factors_array, regularization_array, iterations_array, alpha_val_array, userids, urm_train=data.get_urm_train(), urm_test=data.get_urm_test(), filter_already_liked=True, items_to_exclude=[], N=10, verbose=True, write_on_file=True): """ :param factors_array :param regularization_array :param iterations_array :param alpha_val_array :param userids: id of the users to take into account during evaluation :param urm_train: matrix on which train the model :param urm_test: matrix in which test the model :param filter_already_liked: :param items_to_exclude: :param N: evaluate on map@10 :param verbose: :param write_on_file: ----------- :return: _ """ #create the initial model recommender = AlternatingLeastSquare(urm_train) path = 'validation_results/' name = 'als' folder = time.strftime('%d-%m-%Y') filename = '{}/{}/{}{}.csv'.format(path, folder, name, time.strftime('_%H-%M-%S')) # create dir if not exists os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, 'w') as out: for f in factors_array: for r in regularization_array: for i in iterations_array: for a in alpha_val_array: #train the model with the parameters if verbose: print( '\n\nTraining ALS with\n Factors: {}\n Regulatization: {}\n' 'Iterations: {}\n Alpha_val: {}'.format( f, r, i, a)) print('\n training phase...') recommender.fit(f, r, i, a) #get the recommendations from the trained model recommendations = recommender.recommend_batch( userids=userids, N=N, filter_already_liked=filter_already_liked, items_to_exclude=items_to_exclude) #evaluate the model with map10 map10 = recommender.evaluate(recommendations, test_urm=urm_test) if verbose: print('map@10: {}'.format(map10)) #write on external files on folder models_validation if write_on_file: out.write( '\n\nFactors: {}\n Regulatization: {}\n Iterations: {}\n ' 'Alpha_val: {}\n evaluation map@10: {}'. format(f, r, i, a, map10))
from recommenders.collaborative_filtering.SLIM_RMSE import SLIMElasticNetRecommender import data.data as d import inout.importexport as io urm = d.get_urm() urm_train = d.get_urm_train() target_id = d.get_all_playlists() urm_test = d.get_urm_test() t_id = d.get_target_playlists() recommender = SLIMElasticNetRecommender(urm) recommender.fit(topK=100, alpha=1e-4, l1_ratio=0.1, max_iter=100, tol=1e-4) recommender.save_r_hat() #recommendations = recommender.recommend_batch(userids=t_id) #map10 = recommender.evaluate(recommendations, test_urm=urm_test) #print('map@10: {}'.format(map10)) #io.exportcsv(recommendations, path='submissions', name='slim_rmse')
def run(self, distance, urm_train=None, urm=None, urm_test=None, targetids=None, k=100, shrink=10, threshold=0, implicit=True, alpha=None, beta=None, l=None, c=None, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- distance : str, distance metric urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the entire URM for which the targetids corresponds to the row indexes. urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test() targetids : list, target user ids. If None, use: data.get_target_playlists() k : int, K nearest neighbour to consider shrink : float, shrink term used in the normalization threshold : float, all the values under this value are cutted from the final result implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ _urm = data.get_urm_train() _icm = data.get_icm() _urm_test = data.get_urm_test() _targetids = data.get_target_playlists() #_targetids = data.get_all_playlists() start = time.time() urm_train = _urm if urm_train is None else urm_train urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(urm_train, k=k, distance=distance, alpha=alpha, beta=beta, c=c, l=l, shrink=shrink, threshold=threshold, implicit=implicit) recs = self.recommend_batch(targetids, urm=urm, with_scores=with_scores, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name='{}_{}'.format(self.name, distance), verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
def run(self, urm_train=None, urm=None, urm_test=None, targetids=None, factors=100, regularization=0.01, iterations=100, alpha=25, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Returns ------- :return: recs: (list) recommendations :return: map10: (float) MAP10 for the provided recommendations """ _urm_train = data.get_urm_train() _urm = data.get_urm() _icm = data.get_icm() _urm_test = data.get_urm_test() _targetids = data.get_target_playlists() # _targetids = data.get_all_playlists() start = time.time() urm_train = _urm_train if urm_train is None else urm_train urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(l1_ratio=0.1, positive_only=True, alpha=1e-4, fit_intercept=False, copy_X=False, precompute=False, selection='random', max_iter=100, topK=100, tol=1e-4, workers=multiprocessing.cpu_count()) recs = self.recommend_batch(userids=targetids, with_scores=with_scores, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name=self.name, verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10