def __init__(self, urm, ucm, binary=False, verbose=True, mode='offline', datareader=None, verbose_evaluation=True, bm25=False, similarity='tversky'): assert (mode in ('offline', 'online')) if binary: ucm.data = np.ones(ucm.data.shape[0]) self.urm = urm self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.dr = datareader self.mode = mode self.similarity = similarity self.bm25 = bm25 ucm_aux = ucm.copy() ut.inplace_set_rows_zero(X=ucm_aux, target_rows=self.dr.get_test_pids() ) #don't learn from challange set ucm_aux.eliminate_zeros() if self.bm25: self.m_ui = bm25_row(ucm.copy()).tocsr() else: self.m_ui = ucm.copy().tocsr() if self.bm25: self.m_iu = bm25_col(ucm_aux.T.copy()).tocsr() else: self.m_iu = ucm_aux.T.copy().tocsr() if mode == 'offline': self.ev = Evaluator(self.dr)
def position_matrix_boost(pos_matrix, target_rows=None, mode='linear',p1=None,p2=None,v1=None,v2=None,n_steps=None,verbose=True): pos_matrix = sp.csr_matrix(pos_matrix, copy=True, dtype=np.float) if target_rows is None: target_rows=np.arange(0,pos_matrix.shape[0],1) data = [] cols = [] rows = [] for id_row in tqdm(target_rows, disable=not verbose): row = pos_matrix[id_row] if mode == 'linear': new_data = linear(row.data,p1,p2,v1,v2) elif mode == 'steps': new_data = steps(row.data,n_steps,v1,v2) else: print('MODE string error') cols.extend(row.indices.tolist()) rows.extend(np.full(row.indices.shape[0], id_row).tolist()) data.extend(new_data) m1 = sp.csr_matrix((data,(rows,cols)),shape=pos_matrix.shape) ut.inplace_set_rows_zero(X=pos_matrix, target_rows=target_rows) return pos_matrix+m1
def __init__(self, urm, pop=None, binary=False, K1=1.2, B=0.75, verbose=True, mode='offline', datareader=None, verbose_evaluation=True, mode_t=False, trick=False): assert (mode in ('offline', 'online')) if binary: urm.data = np.ones(urm.data.shape[0]) if pop is None: self.pop = urm.sum(axis=0).A1 else: self.pop = pop self.dr = datareader self.urm = urm urm_aux = urm.copy() ut.inplace_set_rows_zero(X=urm_aux, target_rows=self.dr.get_test_pids() ) #don't learn from challange set urm_aux.eliminate_zeros() if mode_t: self.m_ui = urm_aux.copy().tocsr() else: self.m_ui = pre.bm25_row(urm_aux.copy(), K1=K1, B=B).tocsr() if mode_t: self.m_iu = urm_aux.T.copy().tocsr() else: self.m_iu = pre.bm25_row(urm_aux.T.copy(), K1=K1, B=B).tocsr() self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.mode = mode self.mode_t = mode_t if trick: self.urm = pre.bm25_row( urm).tocsr() #high click, high ndcg, better no use if mode == 'offline': self.ev = Evaluator(self.dr)
def __init__(self, urm, pop=None, binary=False, verbose=True, mode='offline', datareader=None, verbose_evaluation=True, similarity='tversky'): assert (mode in ('offline', 'online')) if binary: urm.data = np.ones(urm.data.shape[0]) if pop is None: self.pop = urm.sum(axis=1).A1 #####maybe better use followers else: self.pop = pop self.urm = urm self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.dr = datareader self.mode = mode self.similarity = similarity urm_aux = urm.copy() ut.inplace_set_rows_zero(X=urm_aux, target_rows=self.dr.get_test_pids() ) #don't learn from challange set urm_aux.eliminate_zeros() if self.similarity == 'p3alpha': self.m_ui = pre.bm25_col(urm.copy()).tocsr() else: self.m_ui = urm.copy().tocsr() if self.similarity == 'p3alpha': self.m_iu = pre.bm25_col(urm_aux.T.copy()).tocsr() else: self.m_iu = urm_aux.T.copy().tocsr() if mode == 'offline': self.ev = Evaluator(self.dr)
cluster_4_artist = np.array(cluster_4_artist).ravel() cluster_1_artist_skip_lines = np.setdiff1d(all_lines, cluster_1_artist) cluster_2_artist_skip_lines = np.setdiff1d(all_lines, cluster_2_artist) cluster_3_artist_skip_lines = np.setdiff1d(all_lines, cluster_3_artist) cluster_4_artist_skip_lines = np.setdiff1d(all_lines, cluster_4_artist) ## writing the clustered matrices for path_eurm in filenames: eurm = sps.load_npz(path_eurm) print(path_eurm) eurm_cluster_1_art = csr_matrix(eurm.copy()) inplace_set_rows_zero(eurm_cluster_1_art, cluster_1_artist_skip_lines) sps.save_npz(ar1 + path_eurm.split('/')[-1], eurm_cluster_1_art.tocsr()) eurm_cluster_2_art = csr_matrix(eurm.copy()) inplace_set_rows_zero(eurm_cluster_2_art, cluster_2_artist_skip_lines) sps.save_npz(ar2 + path_eurm.split('/')[-1], eurm_cluster_2_art.tocsr()) eurm_cluster_3_art = csr_matrix(eurm.copy()) inplace_set_rows_zero(eurm_cluster_3_art, cluster_3_artist_skip_lines) sps.save_npz(ar3 + path_eurm.split('/')[-1], eurm_cluster_3_art.tocsr()) eurm_cluster_4_art = csr_matrix(eurm.copy()) inplace_set_rows_zero(eurm_cluster_4_art, cluster_4_artist_skip_lines)
### Submission ### #Data initialization dr = Datareader(verbose=False, mode=mode, only_load=True) #Recommender algorithm initialization rec = R_p_3_beta() #Submitter initialization sb = Submitter(dr) #Getting data ready for the recommender algorithm urm = dr.get_urm() pids = dr.get_test_pids() urm.data = np.ones(len(urm.data)) ut.inplace_set_rows_zero( X=urm, target_rows=pids) #don't learn from challange set urm.eliminate_zeros() p_ui = normalize(urm, norm="l1") p_iu = normalize(urm.T, norm="l1") top = urm.sum(axis=0).A1 # Fitting data rec.fit(p_ui, p_iu, top, pids) #Computing similarity/model rec.compute_model(top_k=knn, shrink=250, alpha=0.5, beta=0.5, verbose=True)