def __init__(self): self.name = "CFW_D_Similarity_Linalg" # best item based similarity collaborative filter item = CFItemBased() sim_item = item.fit(d.get_urm(), 600, distance=DistanceBasedRecommender.SIM_SPLUS, shrink=10, alpha=0.25, beta=0.5, l=0.25, c=0.5).tocsr() # normalization, matrix similarity values are now among 0 and 1. little push in performances self.S_matrix_target = sim_item / sim_item.max() # best similarity content based content = ContentBasedRecommender() sim_content = content.fit(d.get_urm(), d.get_icm(), k=500, distance=DistanceBasedRecommender.SIM_SPLUS, shrink=500, alpha=0.75, beta=1, l=0.5, c=0.5).tocsr() # normalization, matrix similarity values are now among 0 and 1. little push in performances self.S_matrix_contentKNN = sim_content / sim_content.max()
def run(self, num_factors, urm_train=None, urm=None, urm_test=None, targetids=None, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- num_factors : int, number of latent factors urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the entire URM for which the targetids corresponds to the row indexes. urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test() targetids : list, target user ids. If None, use: data.get_target_playlists() Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ _urm = data.get_urm_train() _icm = data.get_icm() _urm_test = data.get_urm_test() _targetids = data.get_target_playlists() #_targetids = data.get_all_playlists() start = time.time() urm_train = _urm if urm_train is None else urm_train #urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(urm_train=urm_train, num_factors=num_factors) recs = self.recommend_batch(userids=targetids) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name=self.name, verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
def validate(self, user_ids=d.get_target_playlists(), log_path=None, normalize_similarity=[False], damp_coeff=[1], add_zeros_quota=[1], loss_tolerance=[1e-6], iteration_limit=[30], use_incremental=[False]): if log_path != None: orig_stdout = sys.stdout f = open( log_path + '/' + self.name + ' ' + time.strftime('_%H-%M-%S') + ' ' + time.strftime('%d-%m-%Y') + '.txt', 'w') sys.stdout = f for ns in normalize_similarity: for dc in damp_coeff: for adq in add_zeros_quota: for lt in loss_tolerance: for il in iteration_limit: for ui in use_incremental: print( self._print(normalize_similarity=ns, add_zeros_quota=dc, loss_tolerance=lt, iteration_limit=il, damp_coeff=dc, use_incremental=ui)) self.fit(ICM=d.get_icm(), URM_train=d.get_urm_train_1(), normalize_similarity=ns, add_zeros_quota=adq, loss_tolerance=lt, iteration_limit=il, damp_coeff=dc, use_incremental=ui) recs = self.recommend_batch( user_ids, urm=d.get_urm_train_1()) r.evaluate(recs, d.get_urm_test_1()) if log_path != None: sys.stdout = orig_stdout f.close()
def run(self, normalize_similarity=False, add_zeros_quota=1, loss_tolerance=1e-6, iteration_limit=30, damp_coeff=1, use_incremental=False, export_results=True, export_r_hat=False, export_for_validation=False): if export_r_hat and export_for_validation: urm = d.get_urm_train_1() else: urm = d.get_urm() self.fit(ICM=d.get_icm(), URM_train=urm, normalize_similarity=normalize_similarity, add_zeros_quota=add_zeros_quota, loss_tolerance=loss_tolerance, iteration_limit=iteration_limit, damp_coeff=damp_coeff, use_incremental=use_incremental) if export_results: print('exporting results') recs = self.recommend_batch(d.get_target_playlists(), N=10, urm=urm, filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False) importexport.exportcsv( recs, 'submission', self._print(normalize_similarity=normalize_similarity, add_zeros_quota=add_zeros_quota, loss_tolerance=loss_tolerance, iteration_limit=iteration_limit, damp_coeff=damp_coeff, use_incremental=use_incremental)) elif export_r_hat: print('saving estimated urm') self.save_r_hat(export_for_validation)
def test(self, distance=DistanceBasedRecommender.SIM_SPLUS, k=600, shrink=10, threshold=0, alpha=0.25, beta=0.5, l=0.5, c=0.25): """ meant as a shortcut to run the model after the validation procedure, allowing the export of the scores on the playlists or of the estimated csr matrix """ recs, map = self.run(urm=d.get_urm(), icm=d.get_icm(), targetids=d.get_target_playlists(), distance=distance, k=k, shrink=shrink, threshold=threshold, alpha=alpha, beta=beta, l=l, c=c, export=export_results) if export_r_hat: print('saving estimated urm') self.save_r_hat() return recs, map
def fit(self, ICM=data.get_icm(), URM_train=data.get_urm_train_1(), normalize_similarity=True, add_zeros_quota=0.1, loss_tolerance=0.0001, iteration_limit=100, damp_coeff=0.1, use_incremental=True): self.URM_train = URM_train self.ICM = ICM self.n_items = self.URM_train.shape[1] self.n_users = self.URM_train.shape[0] self.n_features = self.ICM.shape[1] self.normalize_similarity = normalize_similarity self.add_zeros_quota = add_zeros_quota self.use_incremental = use_incremental self._generateTrainData_low_ram() common_features = self.ICM[self.row_list].multiply( self.ICM[self.col_list]) linalg_result = linalg.lsqr(common_features, self.data_list, show=False, atol=loss_tolerance, btol=loss_tolerance, iter_lim=iteration_limit, damp=damp_coeff) self.D_incremental = linalg_result[0].copy() self.D_best = linalg_result[0].copy() self.epochs_best = 0 self.loss = linalg_result[3] self._compute_W_sparse()
def run(self, urm_train=None, urm=None, urm_test=None, targetids=None, factors=100, regularization=0.01, iterations=100, alpha=25, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Returns ------- :return: recs: (list) recommendations :return: map10: (float) MAP10 for the provided recommendations """ _urm_train = data.get_urm_train_1() _urm = data.get_urm() _icm = data.get_icm() _urm_test = data.get_urm_test_1() _targetids = data.get_target_playlists() # _targetids = data.get_all_playlists() start = time.time() urm_train = _urm_train if urm_train is None else urm_train urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(l1_ratio=0.1, positive_only=True, alpha=1e-4, fit_intercept=False, copy_X=False, precompute=False, selection='random', max_iter=100, topK=100, tol=1e-4, workers=multiprocessing.cpu_count()) recs = self.recommend_batch(userids=targetids, with_scores=with_scores, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name=self.name, verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
log.warning('(r) Save the R^') log.warning('(s) Save the similarity matrix') #log.warning('(v) Validate the model') log.warning('(x) Exit') arg = input()[0] print() model = ContentBasedRecommender() if arg == 't': # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train()) # model.evaluate(recommendations=recs, test_urm=data.get_urm_test()) model.test(distance=model.SIM_SPLUS, k=500,alpha=0.75,beta=1,shrink=500,l=0.5,c=0.5) elif arg == 'r': log.info('Wanna save for evaluation (y/n)?') choice = input()[0] == 'y' model.fit(urm=data.get_urm_train_2(),icm=data.get_icm(), distance=model.SIM_SPLUS,k=500,shrink=500,alpha=0.75,beta=1,l=0.5,c=0.5) print('Saving the R^...') model.save_r_hat(evaluation=choice) elif arg == 's': model.fit(urm=data.get_urm_train_2(),icm=data.get_icm(), distance=model.SIM_SPLUS,k=500,shrink=500,alpha=0.75,beta=1,l=0.5,c=0.5) print('Saving the similarity matrix...') sps.save_npz('raw_data/saved_sim_matrix_evaluation_2/{}'.format(model.name), model.get_sim_matrix()) # elif arg == 'v': # model.validate(....) elif arg == 'x': pass else: log.error('Wrong option!') # recs = model.recommend_batch(userids=data.get_target_playlists(), urm=data.get_urm_train()) # recs_seq = model.recommend_batch(userids=data.get_sequential_target_playlists(), urm=data.get_urm_train())
def run(self, distance, urm_train=None, urm=None, urm_test=None, targetids=None, k=100, shrink=10, threshold=0, implicit=True, alpha=None, beta=None, l=None, c=None, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- distance : str, distance metric urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the entire URM for which the targetids corresponds to the row indexes. urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test() targetids : list, target user ids. If None, use: data.get_target_playlists() k : int, K nearest neighbour to consider shrink : float, shrink term used in the normalization threshold : float, all the values under this value are cutted from the final result implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ _urm = data.get_urm_train() _icm = data.get_icm() _urm_test = data.get_urm_test() _targetids = data.get_target_playlists() #_targetids = data.get_all_playlists() start = time.time() urm_train = _urm if urm_train is None else urm_train urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(urm_train, k=k, distance=distance, alpha=alpha, beta=beta, c=c, l=l, shrink=shrink, threshold=threshold, implicit=implicit) recs = self.recommend_batch(targetids, urm=urm, with_scores=with_scores, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name='{}_{}'.format(self.name, distance), verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
raise ValueError( "Value for 'feature_weighting' not recognized. Acceptable values are {}, provided was '{}'" .format(self.FEATURE_WEIGHTING_VALUES, feature_weighting)) if feature_weighting == "BM25": self.ICM = self.ICM.astype(np.float32) self.ICM = okapi_BM_25(self.ICM) elif feature_weighting == "TF-IDF": self.ICM = self.ICM.astype(np.float32) self.ICM = TF_IDF(self.ICM) similarity = Compute_Similarity(self.ICM.T, shrink=shrink, topK=topK, normalize=normalize, similarity=similarity, **similarity_args) if self.sparse_weights: self.W_sparse = similarity.compute_similarity() else: self.W = similarity.compute_similarity() self.W = self.W.toarray() rec = ItemKNNCBFRecommender(ICM=data.get_icm(), URM_train=data.get_urm_train_1()) rec.fit(feature_weighting='TF-IDF') recs = rec.recommend_batch(userids=data.get_target_playlists(), type='ITEM') rec.evaluate(recs, test_urm=data.get_urm_test_1())