def __init__(self, urm, ucm=None, stopwords=[], load_ucm=False, save_ucm=False, binary=False, verbose=True, mode='offline', datareader=None, verbose_evaluation=True): assert(mode in ('offline', 'online')) if binary: urm.data=np.ones(urm.data.shape[0]) # best: norm, wor, split, skipw, porter2, lanca2 norm = True work = True split = True skip_words = True date = False porter = False porter2 = True lanca = False lanca2 = True data1 = False self.ucm=ucm if self.ucm is None and not load_ucm: nlp = NLP(datareader, stopwords=stopwords, norm=norm, work=work, split=split, date=date, skip_words=skip_words, porter=porter, porter2=porter2, lanca=lanca, lanca2=lanca2) self.ucm = nlp.get_UCM(data1=data1) elif self.ucm is None and load_ucm: self.load_ucm('ucm_nlp.npz') if save_ucm: self.save_ucm('ucm_nlp.npz') self.m_uc = pre.bm25_row(self.ucm.copy()).tocsr() self.m_cu = pre.bm25_row(self.ucm.copy()).T.tocsr() self.urm = urm self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.dr = datareader self.mode = mode if mode == 'offline': self.ev = Evaluator(self.dr)
def __init__(self, urm, pop=None, binary=False, verbose=True, mode='offline', datareader=None, verbose_evaluation=True): assert (mode in ('offline', 'online')) if binary: urm.data = np.ones(urm.data.shape[0]) if pop is None: self.pop = urm.sum(axis=0).A1 else: self.pop = pop self.urm = urm self.m_ui = pre.bm25_row(urm.copy()).tocsr() self.m_iu = pre.bm25_row(urm.T.copy()).tocsr() self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.dr = datareader self.mode = mode if mode == 'offline': self.ev = Evaluator(self.dr)
def __init__(self, urm, pop=None, binary=False, K1=1.2, B=0.75, verbose=True, mode='offline', datareader=None, verbose_evaluation=True, mode_t=False, trick=False): assert (mode in ('offline', 'online')) if binary: urm.data = np.ones(urm.data.shape[0]) if pop is None: self.pop = urm.sum(axis=0).A1 else: self.pop = pop self.dr = datareader self.urm = urm urm_aux = urm.copy() ut.inplace_set_rows_zero(X=urm_aux, target_rows=self.dr.get_test_pids() ) #don't learn from challange set urm_aux.eliminate_zeros() if mode_t: self.m_ui = urm_aux.copy().tocsr() else: self.m_ui = pre.bm25_row(urm_aux.copy(), K1=K1, B=B).tocsr() if mode_t: self.m_iu = urm_aux.T.copy().tocsr() else: self.m_iu = pre.bm25_row(urm_aux.T.copy(), K1=K1, B=B).tocsr() self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.mode = mode self.mode_t = mode_t if trick: self.urm = pre.bm25_row( urm).tocsr() #high click, high ndcg, better no use if mode == 'offline': self.ev = Evaluator(self.dr)
if data1: nome+="data1_" nlp = NLP2(dr, stopwords=[], norm=norm,work=work,split=split,date=date, skip_words=skip_words, porter=porter,porter2=porter2,lanca=lanca,lanca2=lanca2) # new_titles, occ_full, occ_single = nlp.fit( verbose=False, workout=True, normalize=True, date=True, lancaster=False, # porter=False, underscore=True, double_fit=False) ucm = nlp.get_UCM(data1=data1) urm = dr.get_urm() test_playlists = dr.get_test_pids() print('ucm', ucm.shape) print('Computing similarity...') start = time.time() # Compute similarity ucm= bm25_row(ucm) similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm = dot_product(similarity, urm, k=500) eurm = eurm.tocsr() eurm = eurm[test_playlists, :] print('eurm', eurm.shape) print(time.time() - start)
elif feature_type == 'artist': album = False artist = True else: print("invalid type") complete_name = mode + "_" + name + "_" + feature_type + "_knn=" + str( knn) + "_topk=" + str(topk) if mode == "offline": dr = Datareader(verbose=False, mode=mode, only_load=True) urm = dr.get_urm() icm = dr.get_icm(arid=artist, alid=album) pid = dr.get_test_pids() icm_bm25 = pre.bm25_row(icm) cbfi = Knn_content_item() cbfi.fit(urm, icm_bm25, pid) cbfi.compute_model(top_k=knn, sm_type=TVERSKY, shrink=100, alpha=0.1, binary=False, verbose=True) cbfi.compute_rating(top_k=topk, verbose=True, small=True) sps.save_npz(complete_name + ".npz", cbfi.eurm) ev = Evaluator(dr) ev.evaluate(recommendation_list=eurm_to_recommendation_list(cbfi.eurm),
import recommenders.similarity.p3alpha_rp3beta as p3r3 import numpy as np from utils import ensembler import scipy.sparse as sps import gc from sklearn.preprocessing import normalize import sys from utils.pre_processing import bm25_col, bm25_row dr = Datareader(verbose=False, mode='offline', only_load=True) ev = Evaluator(dr) #Getting for the recommender algorithm urm = dr.get_urm() #urm.data = np.ones(len(urm.data)) p_ui = bm25_row(urm.copy()) p_iu = bm25_row(urm.T.copy()) pop = urm.sum(axis=0).A1 pids = dr.get_test_pids() t_urm = sps.csr_matrix(p_ui.copy()[pids]) def recsys(alpha, beta): alpha = alpha beta = beta k = 200 shrink = 100 config = ('alpha=%.2f beta=%.2f k=%d shrink=%d binary=False' % (alpha, beta, k, shrink)) sim = p3r3.p3alpha_rp3beta_similarity(p_iu, p_ui,
else: print("invalid type") complete_name = mode + "_" + name + "_" + feature_type + "_knn=" + str( knn) + "_topk=" + str(topk) if mode == "offline": dr = Datareader(verbose=False, mode=mode, only_load=True) urm = dr.get_urm() icm = dr.get_icm(arid=artist, alid=album) pid = dr.get_test_pids() clustered_icm, cluster_allocation = clusterize_icm(icm, n_clusters=1000) icm_bm25 = pre.bm25_row(clustered_icm) cbfi = Knn_content_item() cbfi.fit(urm, icm_bm25, pid) cbfi.compute_model(top_k=knn, sm_type=TVERSKY, shrink=100, alpha=0.1, binary=False, verbose=True) cbfi.compute_rating(top_k=topk, verbose=True, small=True) sps.save_npz(complete_name + ".npz", cbfi.eurm) ev = Evaluator(dr) ev.evaluate(recommendation_list=eurm_to_recommendation_list(cbfi.eurm),