def __init__(self,
              urm,
              ucm,
              binary=False,
              verbose=True,
              mode='offline',
              datareader=None,
              verbose_evaluation=True,
              bm25=False,
              similarity='tversky'):
     assert (mode in ('offline', 'online'))
     if binary: ucm.data = np.ones(ucm.data.shape[0])
     self.urm = urm
     self.binary = binary
     self.verbose = verbose
     self.verbose_ev = verbose_evaluation
     self.dr = datareader
     self.mode = mode
     self.similarity = similarity
     self.bm25 = bm25
     ucm_aux = ucm.copy()
     ut.inplace_set_rows_zero(X=ucm_aux,
                              target_rows=self.dr.get_test_pids()
                              )  #don't learn from challange set
     ucm_aux.eliminate_zeros()
     if self.bm25: self.m_ui = bm25_row(ucm.copy()).tocsr()
     else: self.m_ui = ucm.copy().tocsr()
     if self.bm25: self.m_iu = bm25_col(ucm_aux.T.copy()).tocsr()
     else: self.m_iu = ucm_aux.T.copy().tocsr()
     if mode == 'offline':
         self.ev = Evaluator(self.dr)
예제 #2
0
def position_matrix_boost(pos_matrix, target_rows=None, mode='linear',p1=None,p2=None,v1=None,v2=None,n_steps=None,verbose=True):
    pos_matrix = sp.csr_matrix(pos_matrix, copy=True, dtype=np.float)
    if target_rows is None: target_rows=np.arange(0,pos_matrix.shape[0],1)
    data = []
    cols = []
    rows = []
    for id_row in tqdm(target_rows, disable=not verbose):
        row = pos_matrix[id_row]
        if mode == 'linear': new_data = linear(row.data,p1,p2,v1,v2)
        elif mode == 'steps': new_data = steps(row.data,n_steps,v1,v2)
        else: print('MODE string error')
        cols.extend(row.indices.tolist())
        rows.extend(np.full(row.indices.shape[0], id_row).tolist())
        data.extend(new_data)
    m1 = sp.csr_matrix((data,(rows,cols)),shape=pos_matrix.shape)
    ut.inplace_set_rows_zero(X=pos_matrix, target_rows=target_rows)
    return pos_matrix+m1
 def __init__(self,
              urm,
              pop=None,
              binary=False,
              K1=1.2,
              B=0.75,
              verbose=True,
              mode='offline',
              datareader=None,
              verbose_evaluation=True,
              mode_t=False,
              trick=False):
     assert (mode in ('offline', 'online'))
     if binary: urm.data = np.ones(urm.data.shape[0])
     if pop is None: self.pop = urm.sum(axis=0).A1
     else: self.pop = pop
     self.dr = datareader
     self.urm = urm
     urm_aux = urm.copy()
     ut.inplace_set_rows_zero(X=urm_aux,
                              target_rows=self.dr.get_test_pids()
                              )  #don't learn from challange set
     urm_aux.eliminate_zeros()
     if mode_t: self.m_ui = urm_aux.copy().tocsr()
     else: self.m_ui = pre.bm25_row(urm_aux.copy(), K1=K1, B=B).tocsr()
     if mode_t: self.m_iu = urm_aux.T.copy().tocsr()
     else: self.m_iu = pre.bm25_row(urm_aux.T.copy(), K1=K1, B=B).tocsr()
     self.binary = binary
     self.verbose = verbose
     self.verbose_ev = verbose_evaluation
     self.mode = mode
     self.mode_t = mode_t
     if trick:
         self.urm = pre.bm25_row(
             urm).tocsr()  #high click, high ndcg, better no use
     if mode == 'offline':
         self.ev = Evaluator(self.dr)
예제 #4
0
 def __init__(self,
              urm,
              pop=None,
              binary=False,
              verbose=True,
              mode='offline',
              datareader=None,
              verbose_evaluation=True,
              similarity='tversky'):
     assert (mode in ('offline', 'online'))
     if binary: urm.data = np.ones(urm.data.shape[0])
     if pop is None:
         self.pop = urm.sum(axis=1).A1  #####maybe better use followers
     else:
         self.pop = pop
     self.urm = urm
     self.binary = binary
     self.verbose = verbose
     self.verbose_ev = verbose_evaluation
     self.dr = datareader
     self.mode = mode
     self.similarity = similarity
     urm_aux = urm.copy()
     ut.inplace_set_rows_zero(X=urm_aux,
                              target_rows=self.dr.get_test_pids()
                              )  #don't learn from challange set
     urm_aux.eliminate_zeros()
     if self.similarity == 'p3alpha':
         self.m_ui = pre.bm25_col(urm.copy()).tocsr()
     else:
         self.m_ui = urm.copy().tocsr()
     if self.similarity == 'p3alpha':
         self.m_iu = pre.bm25_col(urm_aux.T.copy()).tocsr()
     else:
         self.m_iu = urm_aux.T.copy().tocsr()
     if mode == 'offline':
         self.ev = Evaluator(self.dr)
예제 #5
0
    cluster_4_artist = np.array(cluster_4_artist).ravel()

    cluster_1_artist_skip_lines = np.setdiff1d(all_lines, cluster_1_artist)
    cluster_2_artist_skip_lines = np.setdiff1d(all_lines, cluster_2_artist)
    cluster_3_artist_skip_lines = np.setdiff1d(all_lines, cluster_3_artist)
    cluster_4_artist_skip_lines = np.setdiff1d(all_lines, cluster_4_artist)

    ## writing the clustered matrices

    for path_eurm in filenames:

        eurm = sps.load_npz(path_eurm)
        print(path_eurm)

        eurm_cluster_1_art = csr_matrix(eurm.copy())
        inplace_set_rows_zero(eurm_cluster_1_art, cluster_1_artist_skip_lines)
        sps.save_npz(ar1 + path_eurm.split('/')[-1],
                     eurm_cluster_1_art.tocsr())

        eurm_cluster_2_art = csr_matrix(eurm.copy())
        inplace_set_rows_zero(eurm_cluster_2_art, cluster_2_artist_skip_lines)
        sps.save_npz(ar2 + path_eurm.split('/')[-1],
                     eurm_cluster_2_art.tocsr())

        eurm_cluster_3_art = csr_matrix(eurm.copy())
        inplace_set_rows_zero(eurm_cluster_3_art, cluster_3_artist_skip_lines)
        sps.save_npz(ar3 + path_eurm.split('/')[-1],
                     eurm_cluster_3_art.tocsr())

        eurm_cluster_4_art = csr_matrix(eurm.copy())
        inplace_set_rows_zero(eurm_cluster_4_art, cluster_4_artist_skip_lines)
        ### Submission ###
        #Data initialization
        dr = Datareader(verbose=False, mode=mode, only_load=True)

        #Recommender algorithm initialization
        rec = R_p_3_beta()

        #Submitter initialization
        sb = Submitter(dr)

        #Getting data ready for the recommender algorithm
        urm = dr.get_urm()
        pids = dr.get_test_pids()
        urm.data = np.ones(len(urm.data))

        ut.inplace_set_rows_zero(
            X=urm, target_rows=pids)  #don't learn from challange set
        urm.eliminate_zeros()

        p_ui = normalize(urm, norm="l1")
        p_iu = normalize(urm.T, norm="l1")
        top = urm.sum(axis=0).A1

        # Fitting data
        rec.fit(p_ui, p_iu, top, pids)

        #Computing similarity/model
        rec.compute_model(top_k=knn,
                          shrink=250,
                          alpha=0.5,
                          beta=0.5,
                          verbose=True)