def evaluate_eurm(self, target_pids): assert(self.mode=='offline') eurm = sps.csr_matrix(self.eurm[target_pids]) eurm = post.eurm_remove_seed(eurm, self.dr) rec_list = post.eurm_to_recommendation_list(eurm) res = self.ev.evaluate(rec_list, str(self) , verbose=self.verbose_ev, return_result='all') return res
def evaluateRecommendationsSpotify(self): # print("Recommender: sparsity self.W_sparse:", self.W_sparse.nnz / self.W_sparse.shape[1] / self.W_sparse.shape[0]) user_profile_batch = self.URM_train[pids_converted] print("dot product") eurm = dot_product(user_profile_batch, self.W_sparse, k=750).tocsr() eurm = eurm_remove_seed(eurm) recommendation_list = np.zeros((10000, 500)) for row in range(eurm.shape[0]): val = eurm[row].data ind = val.argsort()[-500:][::-1] ind = eurm[row].indices[ind] recommendation_list[row, 0:len(ind)] = ind prec_t, ndcg_t, clicks_t, prec_a, ndcg_a, clicks_a = ev.evaluate( recommendation_list=recommendation_list, name=self.configuration + "_epoca" + str(self.currentEpoch), return_overall_mean=True, verbose=False, show_plot=False, do_plot=True) results_run = {} results_run["prec_t"] = prec_t results_run["ndcg_t"] = ndcg_t results_run["clicks_t"] = clicks_t results_run["prec_a"] = prec_a results_run["ndcg_a"] = ndcg_a results_run["clicks_a"] = clicks_a return (results_run)
def recsys(alpha, beta): alpha = alpha beta = beta k = 200 shrink = 100 config = ('alpha=%.2f beta=%.2f k=%d shrink=%d binary=False' % (alpha, beta, k, shrink)) #print(config) sim = p3r3.p3alpha_rp3beta_similarity(p_iu, p_ui, pop, k=k, shrink=shrink, alpha=alpha, beta=beta, verbose=True, mode=1) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
def recsys(shrink): t1 = 0.25 t2 = 0.65 c = 0.4 shrink = 50 k = 200 config = ('l=%.2f t1=%.2f t2=%.2f c=%.2f k=%d shrink=%d binary=False' % (l, t1, t2, c, k, shrink)) #print(config) sim = ss.s_plus_similarity(urm.T, urm, k=k, t1=t1, t2=t2, c=c, l=l, normalization=True, shrink=shrink, binary=False, verbose=True) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim.T, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
def __init__(self, matrices_names, matrices_array, dr, cat, start, end, n_calls=1000, n_random_starts=0.1, n_points=50, step=0.001, verbose=True): self.target_metric = 'ndcg' self.best_score = 0 self.best_params = 0 self.norm = norm_max_row self.verbose = verbose self.n_cpu = int(multiprocessing.cpu_count() / 10) if self.n_cpu == 0: self.n_cpu = 1 # Do not edit self.start = start self.end = end self.cat = cat self.global_counter = 0 self.start_index = (cat - 1) * 1000 self.end_index = cat * 1000 self.matrices_array = list() self.matrices_names = matrices_names self.n_calls = n_calls self.global_counter = 0 self.x0 = None self.y0 = None self.n_random_starts = int(n_calls * n_random_starts) self.n_points = n_points self.step = step # memory_on_disk= False self.memory_on_notebook = True self.dr = dr self.ev = Evaluator(self.dr) for matrix in matrices_array: self.matrices_array.append( self.norm( eurm_remove_seed( matrix, datareader=dr)[self.start_index:self.end_index])) del self.dr, matrices_array
def evaluate_shrinked(W_sparse, urm_shrinked, pids_shrinked ): W_sparse = W_sparse[pids_shrinked] eurm = dot_product(W_sparse, urm_shrinked, k=750).tocsr() eurm = eurm_remove_seed(eurm=eurm) rec_list = eurm_to_recommendation_list(eurm) ev.evaluate(recommendation_list=rec_list, name="slim_structure_parametribase_BPR_epoca_0_noepoche", return_overall_mean=False, show_plot=False, do_plot=True)
def compute_rating(self, urm2=None, datareader=None, top_k=750, verbose=False, small=False, mode="offline", remove_seed=True): """ :param urm: sparse matrix :param model: sparse matrix :param top_k: int, element to take for each row after fitting process :param small: boolean, if true return an eurm matrix with just the target playlist :param verbose: boolean, if true print debug information :param remove_seed: boolean, if true remove seed from eurm :return: sparse matrix, estimated urm """ if small: self.urm = sps.csr_matrix(self.urm[self.pid]) self.urm = sps.csr_matrix(self.urm) self.model = sps.csr_matrix(self.model) if verbose: print("[ Compute ratings ]") start_time = time.time() if urm2 != None: self.urm = urm2[self.pid] self.eurm = dot(self.urm, self.model, k=top_k) print("eurm shape: " + str(self.eurm.shape)) if remove_seed: if datareader is None: print( '[ WARNING! Datareader is None in "compute rating". mode is set to' + mode.upper() + ', creating it again. ' 'A future version will require it. ]') from utils.datareader import Datareader datareader = Datareader(mode=mode, only_load=True) self.eurm = eurm_remove_seed(self.eurm, datareader=datareader) if verbose: print("time: " + str(int(time.time() - start_time) / 60)) return self.eurm.tocsr()
def recsys(shrink): config = ('alpha=0.4 k=200 shrink=%d binary=False' % (shrink)) print(config) sim = ss.cosine_similarity(urm.T, urm, k=200, alpha=0.4, shrink=shrink, binary=False, verbose=True) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim.T, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
def track(self): eurm = sps.lil_matrix(self.urm_of.shape) pids = self.dr_on.get_test_pids(cat=2) pids_all = self.dr_of.get_test_pids() for row in tqdm(pids): track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.urm_on.indptr[row + 1]][0] playlists = self.urm_col.indices[ self.urm_col.indptr[track_ind]: self.urm_col.indptr[track_ind+1]] top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32) track_ind_rec = top.argsort()[-501:][::-1] eurm[row, track_ind_rec] = top[track_ind_rec] eurm = eurm.tocsr()[pids_all] eurm = eurm_remove_seed(eurm, self.dr_on) print(eurm) return eurm.copy()
def album(self): eurm = sps.lil_matrix(self.urm_of.shape) pids = self.dr_on.get_test_pids(cat=2) pids_all = self.dr_of.get_test_pids() ucm_album = self.dr_of.get_ucm_albums().tocsc() album_dic = self.dr_of.get_track_to_album_dict() for row in tqdm(pids): track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.urm_on.indptr[row + 1]][0] album = album_dic[track_ind] playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album.indptr[album+1]] top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32) track_ind_rec = top.argsort()[-501:][::-1] eurm[row, track_ind_rec] = top[track_ind_rec] eurm = eurm.tocsr()[pids_all] eurm = eurm_remove_seed(eurm, self.dr_on) return eurm
def recsys(shrink): alpha = 0.25 beta = 0.65 k = 200 config = ('alpha=%.2f beta=%.2f k=%d shrink=%d binary=False' % (alpha, beta, k, shrink)) #print(config) sim = ss.tversky_similarity(urm.T, urm, k=k, alpha=alpha, beta=beta, shrink=shrink, binary=False, verbose=True) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim.T, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
result_dict[song] = result_dict[song]+fs.freq*len(fs.sequence) else: result_dict[song] = fs.freq for song_predicted in result_dict: pred[i,song_predicted] = result_dict[song_predicted] eurm = eurm_remove_seed(pred , dr ) rec_list = eurm_to_recommendation_list(eurm) ev.evaluate(rec_list, "cat2_top",verbose=True, do_plot=True, show_plot=True, save=True, ) # seuences: [15565, 6186, 6288, 6292, 6294, 6295, 6298, 6310, 6334, 6336, 6337, 6339, 6340, 6362, 6380, 6387, 7597, 7603, 7604, 7605, 7606, 7607, 6173, 6077, 6040, 6027, 74, 76, 77, 81, 282, 768, 2163, 2506, 2507, 2508, 7609, 3084, 3166, 3183, 3282, 3283, 3697, 4211, 4420, 4443, 4493, 6019, 3162, 73, 8408, 8460, 15544, 15545, 15546, 15547, 15548, 15549, 15550, 15551, 15552, 15553, 15554, 15555, 15556, 15557, 15558, 15559, 15560, 15561, 15562, 15563, 15564, 15543, 15503, 15152, 14809, 8484, 8940, 10480, 10527, 10820, 11192, 11200, 11482, 11500, 11512, 8409, 12605, 12710, 12714, 12716, 12728, 12794, 13689, 13692, 14467, 14797, 14801, 12610, 51] # seuences: [11500] # # # # [[11500], [12714]], 62 # [[11500], [70]], 62 # [[11500], [64]], 70
for token in tokens: playlists_with_tokens.extend( ucm_csc.indices[ucm_csc.indptr[token]:ucm_csc.indptr[token + 1]]) urm_tmp = urm_csr[playlists_with_tokens] track_total_interactions = np.array(urm_tmp.sum(axis=0)).astype( np.int32)[0, :] # like ravel top_pop = track_total_interactions.argsort()[-750:][::-1] rec_list[i] = top_pop i += 1 np.save("nlp_toketoppop_rec_list_offline", rec_list) eurm = rec_list_to_eurm(rec_list=rec_list) eurm = eurm_remove_seed(eurm, dr) rec_list = eurm_to_recommendation_list(eurm) ev.evaluate( rec_list, "WEILA2_toktoktop_pop", verbose=True, do_plot=True, show_plot=True, save=True, )
x0 = None y0 = None if os.path.isfile(ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/memory/cat'+ str(cat)+'_y0_MEMORY.pkl') and \ os.path.isfile(ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/memory/cat' + str(cat) + '_x0_MEMORY.pkl'): x0 = load_obj('cat' + str(cat) + '_x0_MEMORY', path= ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/memory/') y0 = load_obj('cat' + str(cat) + '_y0_MEMORY', path= ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/memory/') global_counter = len(y0) print("[ CAT"+str(cat)+" : RESUMING FROM RUN", global_counter, "]") print("[ CAT "+str(cat)+": STARTING, NOW LOADING MATRICES ]") matrices_names = read_params_dict(ROOT_DIR+'/bayesian_scikit/'+configuration_name+'/name_settings')[cat-1] file_locations = read_params_dict(ROOT_DIR+'/bayesian_scikit/bayesian_common_files/file_locations_offline') matrices_array = [norm( eurm_remove_seed( sps.load_npz(file_locations[x]), dr)[start_index:end_index]) for x in matrices_names ] del dr start_time=time.time() space = [Real(0, 100, name=x) for x in matrices_names] res = gp_minimize(objective_function, space, base_estimator=None, n_calls=450+len(matrices_array)*calls_constant, n_random_starts=100, acq_func='gp_hedge', acq_optimizer='auto', x0=x0, y0=y0, random_state=None, verbose=False, callback=None, n_points=100, n_restarts_optimizer=10, xi=0.012, kappa=1.96,
for playlist_id in tqdm(test_playlists, desc="shao belo"): songs = urm_csr.indices[urm_csr.indptr[playlist_id]:urm_csr. indptr[playlist_id + 1]] playlists_with_tokens = urm_csc.indices[ urm_csc.indptr[songs[0]]:urm_csc.indptr[songs[0] + 1]] track_total_interactions = urm_csr[playlists_with_tokens].sum( axis=0).A1 top_pop = track_total_interactions.argsort()[-601:][::-1] rec_list[i] = top_pop i += 1 eurm = eurm_remove_seed(rec_list_to_eurm(rec_list), dr) rec_list = eurm_to_recommendation_list(eurm) ev.evaluate( rec_list, "cat2_top", verbose=True, do_plot=True, show_plot=True, save=True, ) sps.save_npz("top_pop_cat2_" + mode, eurm)
from utils import post_processing as post import scipy.sparse as sps from utils.datareader import Datareader from utils.post_processing import eurm_remove_seed mode = "online" dr = Datareader(verbose=False, mode=mode, only_load="False") name = mode + "/slim_online" eurm = eurm_remove_seed(sps.load_npz(mode + "/slim_online.npz"), dr) # sps.save_npz(mode+"/online_nlp_knn100_bm25.npz",eurm) for i in range(1, 11): indices = dr.get_test_pids_indices(cat=i) save = eurm[indices] sps.save_npz(name + "-cat" + str(i) + ".npz", save)
pids = dr.get_test_pids() urm.data = np.ones(len(urm.data)) ut.inplace_set_rows_zero( X=urm, target_rows=pids) #don't learn from challange set urm.eliminate_zeros() p_ui = normalize(urm, norm="l1") p_iu = normalize(urm.T, norm="l1") top = urm.sum(axis=0).A1 # Fitting data rec.fit(p_ui, p_iu, top, pids) #Computing similarity/model rec.compute_model(top_k=knn, shrink=250, alpha=0.5, beta=0.5, verbose=True) # INJECTING URM POS with only last 25 songs rec.urm = dr.get_last_n_songs_urm(n=cut) #Computing ratings rec.compute_rating(top_k=topk, datareader=dr, verbose=True, small=True) rec.eurm = eurm_remove_seed(rec.eurm, dr) sps.save_npz(complete_name, rec.eurm)
from utils.post_processing import eurm_to_recommendation_list, eurm_remove_seed from personal.Ervin.Word2Vec_recommender import W2VRecommender from personal.Ervin.ItemRank import ItemRank from personal.Ervin.tf_collaborative_user import TF_collaborative_user from recommenders.knn_collaborative_item import Knn_collaborative_item if __name__ == '__main__': dr = Datareader(only_load=True, mode='offline', test_num='1', verbose=False) pid = dr.get_test_playlists().transpose()[0] urm = dr.get_urm() urm.data = np.ones(len(urm.data)) ev = Evaluator(dr) TFRec = Knn_collaborative_item() W2V = W2VRecommender() TFRec.fit(urm, pid) W2V.fit(urm, pid) TFRec.compute_model(verbose=True, top_k=850) TFRec.compute_rating(top_k=750, verbose=True, small=True) W2V.compute_model(verbose=True, size=50, window=None) W2V.compute_rating(verbose=True, small=True, top_k=750) TFRec.eurm = norm_l1_row(eurm_remove_seed(TFRec.eurm, dr)) W2V.eurm = norm_l1_row(eurm_remove_seed(W2V.eurm, dr)) for alpha in np.arange(0.9, 0, -0.1): print('[ Alpha = {:.1f}]'.format(alpha)) eurm = alpha * TFRec.eurm + (1-alpha)*W2V.eurm ev.evaluate(recommendation_list=eurm_to_recommendation_list(eurm, remove_seed=False, datareader=dr), name="KNNItem_W2V"+str(alpha), old_mode=False, save=True)
file_locations = read_params_dict(ROOT_DIR + '/bayesian_scikit/bayesian_common_files/file_locations_' + mode) # LOAD MATRICES matrices_loaded=dict() all_matrices_names = set() for cat in range(1,11): with open(ROOT_DIR+'/bayesian_scikit/'+configuration_name + '/best_params/cat'+str(cat)+'_params_dict') as f: best_params_dict = json.load(f) for name, value_from_bayesian in best_params_dict.items(): all_matrices_names.add(name) for name in tqdm(all_matrices_names,desc='loading matrices'): if name not in matrices_loaded.keys() and name!='norm': matrices_loaded[name] = eurm_remove_seed(sps.load_npz(file_locations[name]), dr) rec_list = [[] for x in range(10000)] eurms_cutted = [[] for x in range(10)] # BUILDING THE EURM FROM THE PARAMS for cat in tqdm(range(1,11),desc="summing up the matrices"): start_index = (cat - 1) * 1000 end_index = cat * 1000 best_params_dict = read_params_dict(name='cat' + str(cat) + '_params_dict', path=ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/best_params/') norm = best_params_dict['norm']
def get_top_pop_track(self, mode): ''' :return: csr_matrix filled with the reccomendation for the cat 2 following track ''' if mode == "online": self.dr_on = Datareader(verbose=False, mode='online', only_load=True) self.urm_on = self.dr_on.get_urm() self.urm_col = sps.csc_matrix(self.urm_on) self.top_p = np.zeros(self.urm_on.shape[1]) eurm = sps.lil_matrix(self.urm_on.shape) pids = self.dr_on.get_test_pids(cat=2) pids_all = self.dr_on.get_test_pids() for row in tqdm(pids): track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self. urm_on.indptr[row + 1]][0] playlists = self.urm_col.indices[self.urm_col. indptr[track_ind]:self. urm_col.indptr[track_ind + 1]] top = self.urm_on[playlists].sum(axis=0).A1.astype(np.int32) track_ind_rec = top.argsort()[-501:][::-1] eurm[row, track_ind_rec] = top[track_ind_rec] eurm = eurm.tocsr()[pids_all] eurm = eurm_remove_seed(eurm, self.dr_on) elif mode == "offline": self.dr_of = Datareader(verbose=False, mode='offline', only_load=True) self.urm_of = self.dr_of.get_urm() self.urm_col = sps.csc_matrix(self.urm_of) self.top_p = np.zeros(self.urm_of.shape[1]) eurm = sps.lil_matrix(self.urm_of.shape) pids = self.dr_of.get_test_pids(cat=2) pids_all = self.dr_of.get_test_pids() for row in tqdm(pids): track_ind = self.urm_of.indices[self.urm_of.indptr[row]:self. urm_of.indptr[row + 1]][0] playlists = self.urm_col.indices[self.urm_col. indptr[track_ind]:self. urm_col.indptr[track_ind + 1]] top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32) track_ind_rec = top.argsort()[-501:][::-1] eurm[row, track_ind_rec] = top[track_ind_rec] eurm = eurm.tocsr()[pids_all] eurm = eurm_remove_seed(eurm, self.dr_of) return eurm.copy().tocsr()
# TopPop Album album = artists_dic[track_ind] playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album. indptr[album + 1]] top = urm[playlists].sum(axis=0).A1.astype(np.int32) track_ind_rec = top.argsort()[-501:][::-1] eurm2[row, track_ind_rec] = top[track_ind_rec] eurm1 = eurm1.tocsr()[pids_all] eurm2 = eurm2.tocsr()[pids_all] eurm1 = eurm_remove_seed(eurm1, dr) eurm2 = eurm_remove_seed(eurm2, dr) sps.save_npz("test1.npz", eurm1) rec_list1 = eurm_to_recommendation_list(eurm1) rec_list2 = eurm_to_recommendation_list(eurm2) rec_list3 = append_rec_list(rec_list1 + rec_list2) ev = Evaluator(dr) ev.evaluate(rec_list1, name="enstest", level='track') ev.evaluate(rec_list2, name="enstest", level='track') ev.evaluate(rec_list3, name="enstest", level='track') # rec.append(list(top_p))
arg = load_obj("best/cat" + str(i) + "") w.append(reorder(dict(arg[:len(arg) - 1][0]), name[i - 1])) print("[ Loading matrix name ]") if mode == "offline": matrix_dict = load_obj("matrix_dict", path="") dir = "offline/" if mode == "online": matrix_dict = load_obj("matrix_dict_online", path="") dir = "online/" _name = flatten(name) loaded_matrix = dict( zip(_name, [ eurm_remove_seed(sps.load_npz(directory + matrix_dict[n]), dr) for n in _name ])) matrix = [] if type == "unique": print("[ Loading cat 1 ]") cat = 1 m = list() for n in name[cat - 1]: m.append(loaded_matrix[n][0:1000]) matrix.append(m) print("[ Loading cat 2 ]") cat = 2