def model(self, alpha=1, beta=0, k=200, shrink=0, power=1, threshold=0, rp3_mode=0, target_items=None): if target_items is None: target_items = self.dr.get_test_pids() # work with s*urm self.alpha, self.beta = alpha, beta self.k = k self.power = power self.shrink, self.threshold = shrink, threshold self.rp3_mode = rp3_mode if self.similarity == 'tversky': self.s = ss.tversky_similarity(self.m_ui, self.m_iu, k=k, shrink=shrink, alpha=alpha, beta=beta, threshold=threshold, verbose=self.verbose, target_items=target_items) elif self.similarity == 'p3alpha' and beta == 0: self.s = ss.p3alpha_similarity(self.m_ui, self.m_iu, k=k, shrink=shrink, alpha=alpha, threshold=threshold, verbose=self.verbose, target_items=target_items) elif self.similarity == 'rp3beta': self.s = p3r3.p3alpha_rp3beta_similarity(self.m_ui, self.m_iu, self.pop, k=k, shrink=shrink, alpha=alpha, beta=beta, threshold=threshold, verbose=self.verbose, mode=rp3_mode, target_items=target_items) else: print('ERROR, similarity not implemented') self.s.data = np.power(self.s.data, power)
def model(self, alpha=1, beta=0, k=200, shrink=0, threshold=0, rp3_mode=0, target_items=None): #if target_items is None it calculate the whole similarity self.alpha, self.beta = alpha, beta self.k = k self.shrink, self.threshold = shrink, threshold self.rp3_mode = rp3_mode if self.mode_t: self.s = ss.tversky_similarity(self.m_iu, self.m_ui, k=k, shrink=shrink, alpha=alpha, beta=beta, threshold=threshold, verbose=self.verbose, target_items=target_items) elif beta == 0: self.s = ss.p3alpha_similarity(self.m_iu, self.m_ui, k=k, shrink=shrink, alpha=alpha, threshold=threshold, verbose=self.verbose, target_items=target_items) else: self.s = p3r3.p3alpha_rp3beta_similarity(self.m_iu, self.m_ui, self.pop, k=k, shrink=shrink, alpha=alpha, beta=beta, threshold=threshold, verbose=self.verbose, mode=rp3_mode, target_items=target_items)
def recsys(shrink): alpha = 0.25 beta = 0.65 k = 200 config = ('alpha=%.2f beta=%.2f k=%d shrink=%d binary=False' % (alpha, beta, k, shrink)) #print(config) sim = ss.tversky_similarity(urm.T, urm, k=k, alpha=alpha, beta=beta, shrink=shrink, binary=False, verbose=True) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim.T, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
def model(self, alpha=1, beta=1, k=200, shrink=0, power=1, threshold=0, target_items=None): if target_items is None: target_items = self.dr.get_test_pids() # work with s*urm self.alpha, self.beta = alpha, beta self.k = k self.power = power self.shrink, self.threshold = shrink, threshold if self.similarity == 'tversky': self.s = ss.tversky_similarity(self.m_ui, self.m_iu, k=k, shrink=shrink, alpha=alpha, beta=beta, threshold=threshold, verbose=self.verbose, target_items=target_items) elif self.similarity == 'dot': self.s = ss.dot_product_similarity(self.m_ui, self.m_iu, k=k, shrink=shrink, threshold=threshold, verbose=self.verbose, target_items=target_items) else: print('ERROR, similarity not implemented') if power != 1: self.s.data = np.power(self.s.data, power)
artists = [dictionary[t] for t in tracks] print('ucm...') ucm = sparse.csr_matrix((np.ones(len(playlists)), (playlists, artists)), shape=(1049361, len(dr.get_artists()))) ucm = ucm.tocsr() ucm = ucm[pids] print(ucm.shape) ucm = bm25_row(ucm) print('similarity..') sim = tversky_similarity(ucm, ucm.T, shrink=200, alpha=0.1, beta=1, k=800, verbose=1, binary=False) sim = sim.tocsr() test_pids = list(dr.get_test_pids()) eurm = dot_product(sim, urm, k=750) eurm = eurm.tocsr() eurm = eurm[test_pids, :] sparse.save_npz('eurm_artists.npz', eurm) #ev.evaluate(eurm_to_recommendation_list(eurm), name='cbf_user_artist', show_plot=False) exit()
# INITIALIZATION dr = Datareader(mode='offline', verbose=False, only_load=True) ev = Evaluator(dr) test_pids = dr.get_test_pids() urm = dr.get_urm() topk = 750 nlp_strict = NLPStrict(dr) ucm_strict = nlp_strict.get_UCM() # TVERSKY for a in [0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.7, 2.0]: print('---------') print('TVERSKY | power =', a) sim = tversky_similarity(ucm_strict, ucm_strict.T, k=450, alpha=0.2, beta=0.5, shrink=150, target_items=test_pids) sim.data = np.power(sim.data, a) # Compute eurm eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] rec_list = eurm_to_recommendation_list(eurm, datareader=dr) ev.evaluate(rec_list, name='nlp_strict_tversky_power=' + str(a))
data1 = False # NLP STRICT nlp_strict = NLPStrict(dr) ucm_strict = nlp_strict.get_UCM().astype(np.float64) top_pop = dr.get_eurm_top_pop() # Do not train on challenge set ucm_strict_T = ucm_strict.copy() inplace_set_rows_zero(ucm_strict_T, test_pids) ucm_strict_T = ucm_strict_T.T sim = tversky_similarity(ucm_strict, ucm_strict_T, k=450, alpha=0.2, beta=0.5, shrink=150, target_items=test_pids) # Compute eurm eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] # NLP TOKENS nlp = NLP(dr) ucm = nlp.get_UCM(data1=data1).astype(np.float64) # Do not train on challenge set
porter = False porter2 = True lanca = False lanca2 = True data1 = False nlp = NLP(dr, stopwords=[], norm=norm, work=work, split=split, date=date, skip_words=skip_words, porter=porter, porter2=porter2, lanca=lanca, lanca2=lanca2) ucm = nlp.get_UCM(data1=data1) # TVERSKY for s in range(0, 200, 25): print('---------') print('TVERSKY | shrink =', s) sim = tversky_similarity(ucm, ucm.T, k=200, alpha=0.9, beta=1.0, shrink=s, target_items=test_pids) # Compute eurm eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] rec_list = eurm_to_recommendation_list(eurm, datareader=dr) ev.evaluate(rec_list, name='nlp_tversky_shrink=' + str(s))
if mode == 'offline': # Setup urm = dr.get_urm() test_pids = dr.get_test_pids() # Init object nlp = NLP(dr) # Get ucm ucm = nlp.get_UCM() # Compute similarity (playlists x playlists) sim = tversky_similarity(ucm, ucm.T, k=knn, shrink=0, alpha=1, beta=0.1) sim = sim.tocsr() # Recommendation eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] rec_list = eurm_to_recommendation_list(eurm, dr) if save_eurm: sps.save_npz(mode + "_" + name + ".npz", eurm, compressed=False) # Submission
porter = False porter2 = True lanca = False lanca2 = True data1 = False nlp = NLP(dr) ucm = nlp.get_UCM(data1=data1).astype(np.float64) # Do not train on challenge set ucm_T = ucm.copy() inplace_set_rows_zero(ucm_T, test_pids) ucm_T = ucm_T.T sim_lele = tversky_similarity(ucm, ucm_T, k=200, alpha=0.9, beta=1.0, shrink=0, target_items=test_pids) # Compute eurm eurm_lele = dot_product(sim_lele, urm, k=topk) eurm_lele = eurm_lele.tocsr() eurm_lele = eurm_lele[test_pids, :] # a = 0.2 # eurm_l1 = norm_l1_row(eurm) # eurm_lele_l1 = norm_l1_row(eurm_lele) # nlp_fusion = a * eurm_l1 + (1.0 - a) * eurm_lele_l1 #sparse.save_npz('nlp_fusion_tuned_online.npz', nlp_fusion) # rec_list = eurm_to_recommendation_list(nlp_fusion, datareader=dr) # ev.evaluate(rec_list, name='nlp_fusion_l1_a=' + str(a) + '_top_pop') rec_list = eurm_to_recommendation_list(eurm_lele, datareader=dr)
weights = sps.diags(cfw.D_best) sps.save_npz("ucm_weights_maurizi", weights) UCM_weighted = dot_product(UCM, weights) sps.save_npz("ucm_fw_maurizio", UCM_weighted) ######## NOI urm = dr.get_urm() pid = dr.get_test_pids() similarity = tversky_similarity(UCM_weighted, UCM_weighted.T, binary=False, shrink=1, alpha=0.9, beta=1) similarity = similarity.tocsr() # eurm test_playlists = dr.get_test_pids() eurm = dot_product(similarity, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_playlists, :] rec_list = eurm_to_recommendation_list(eurm) # evaluate ev = Evaluator(dr) ev.evaluate(rec_list, name='weighter', verbose=True, show_plot=False)