Пример #1
0
 def evaluate_eurm(self, target_pids):
         assert(self.mode=='offline')
         eurm = sps.csr_matrix(self.eurm[target_pids])
         eurm = post.eurm_remove_seed(eurm, self.dr)
         rec_list = post.eurm_to_recommendation_list(eurm)
         res = self.ev.evaluate(rec_list, str(self) , verbose=self.verbose_ev, return_result='all')
         return res
Пример #2
0
def evaluateRecommendationsSpotify(self):
    # print("Recommender: sparsity self.W_sparse:", self.W_sparse.nnz / self.W_sparse.shape[1] / self.W_sparse.shape[0])

    user_profile_batch = self.URM_train[pids_converted]
    print("dot product")
    eurm = dot_product(user_profile_batch, self.W_sparse, k=750).tocsr()
    eurm = eurm_remove_seed(eurm)

    recommendation_list = np.zeros((10000, 500))
    for row in range(eurm.shape[0]):
        val = eurm[row].data
        ind = val.argsort()[-500:][::-1]
        ind = eurm[row].indices[ind]
        recommendation_list[row, 0:len(ind)] = ind

    prec_t, ndcg_t, clicks_t, prec_a, ndcg_a, clicks_a = ev.evaluate(
        recommendation_list=recommendation_list,
        name=self.configuration + "_epoca" + str(self.currentEpoch),
        return_overall_mean=True,
        verbose=False,
        show_plot=False,
        do_plot=True)

    results_run = {}
    results_run["prec_t"] = prec_t
    results_run["ndcg_t"] = ndcg_t
    results_run["clicks_t"] = clicks_t
    results_run["prec_a"] = prec_a
    results_run["ndcg_a"] = ndcg_a
    results_run["clicks_a"] = clicks_a

    return (results_run)
Пример #3
0
def recsys(alpha, beta):
    alpha = alpha
    beta = beta
    k = 200
    shrink = 100
    config = ('alpha=%.2f beta=%.2f k=%d shrink=%d binary=False' %
              (alpha, beta, k, shrink))
    #print(config)
    sim = p3r3.p3alpha_rp3beta_similarity(p_iu,
                                          p_ui,
                                          pop,
                                          k=k,
                                          shrink=shrink,
                                          alpha=alpha,
                                          beta=beta,
                                          verbose=True,
                                          mode=1)
    #Computing ratings and remove seed
    eurm = ss.dot_product(t_urm, sim, k=750)
    del sim
    eurm = eurm_remove_seed(eurm, dr)
    #evaluation
    res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False)
    del eurm
    return res[0:3], config
Пример #4
0
def recsys(shrink):
    t1 = 0.25
    t2 = 0.65
    c = 0.4
    shrink = 50
    k = 200
    config = ('l=%.2f t1=%.2f t2=%.2f c=%.2f k=%d shrink=%d binary=False' %
              (l, t1, t2, c, k, shrink))
    #print(config)
    sim = ss.s_plus_similarity(urm.T,
                               urm,
                               k=k,
                               t1=t1,
                               t2=t2,
                               c=c,
                               l=l,
                               normalization=True,
                               shrink=shrink,
                               binary=False,
                               verbose=True)
    #Computing ratings and remove seed
    eurm = ss.dot_product(t_urm, sim.T, k=750)
    del sim
    eurm = eurm_remove_seed(eurm, dr)
    #evaluation
    res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False)
    del eurm
    return res[0:3], config
    def __init__(self,
                 matrices_names,
                 matrices_array,
                 dr,
                 cat,
                 start,
                 end,
                 n_calls=1000,
                 n_random_starts=0.1,
                 n_points=50,
                 step=0.001,
                 verbose=True):
        self.target_metric = 'ndcg'
        self.best_score = 0
        self.best_params = 0
        self.norm = norm_max_row
        self.verbose = verbose

        self.n_cpu = int(multiprocessing.cpu_count() / 10)
        if self.n_cpu == 0:
            self.n_cpu = 1
        # Do not edit
        self.start = start
        self.end = end
        self.cat = cat
        self.global_counter = 0
        self.start_index = (cat - 1) * 1000
        self.end_index = cat * 1000
        self.matrices_array = list()
        self.matrices_names = matrices_names
        self.n_calls = n_calls
        self.global_counter = 0
        self.x0 = None
        self.y0 = None
        self.n_random_starts = int(n_calls * n_random_starts)
        self.n_points = n_points
        self.step = step
        # memory_on_disk= False
        self.memory_on_notebook = True
        self.dr = dr
        self.ev = Evaluator(self.dr)

        for matrix in matrices_array:
            self.matrices_array.append(
                self.norm(
                    eurm_remove_seed(
                        matrix,
                        datareader=dr)[self.start_index:self.end_index]))

        del self.dr, matrices_array
Пример #6
0
def evaluate_shrinked(W_sparse, urm_shrinked,  pids_shrinked ):

    W_sparse = W_sparse[pids_shrinked]

    eurm = dot_product(W_sparse, urm_shrinked, k=750).tocsr()

    eurm = eurm_remove_seed(eurm=eurm)

    rec_list = eurm_to_recommendation_list(eurm)


    ev.evaluate(recommendation_list=rec_list,
                name="slim_structure_parametribase_BPR_epoca_0_noepoche",
                return_overall_mean=False,
                show_plot=False, do_plot=True)
    def compute_rating(self,
                       urm2=None,
                       datareader=None,
                       top_k=750,
                       verbose=False,
                       small=False,
                       mode="offline",
                       remove_seed=True):
        """
        :param urm: sparse matrix
        :param model: sparse matrix
        :param top_k: int, element to take for each row after fitting process
        :param small: boolean, if true return an eurm matrix with just the target playlist
        :param verbose: boolean, if true print debug information
        :param remove_seed: boolean, if true remove seed from eurm
        :return: sparse matrix, estimated urm
        """
        if small:
            self.urm = sps.csr_matrix(self.urm[self.pid])
        self.urm = sps.csr_matrix(self.urm)
        self.model = sps.csr_matrix(self.model)

        if verbose:
            print("[ Compute ratings ]")

            start_time = time.time()

        if urm2 != None:
            self.urm = urm2[self.pid]
        self.eurm = dot(self.urm, self.model, k=top_k)

        print("eurm shape: " + str(self.eurm.shape))

        if remove_seed:
            if datareader is None:
                print(
                    '[ WARNING! Datareader is None in "compute rating". mode is set to'
                    + mode.upper() + ', creating it again. '
                    'A future version will require it. ]')
                from utils.datareader import Datareader
                datareader = Datareader(mode=mode, only_load=True)
            self.eurm = eurm_remove_seed(self.eurm, datareader=datareader)

        if verbose:
            print("time: " + str(int(time.time() - start_time) / 60))

        return self.eurm.tocsr()
Пример #8
0
def recsys(shrink):
    config = ('alpha=0.4 k=200 shrink=%d binary=False' % (shrink))
    print(config)
    sim = ss.cosine_similarity(urm.T,
                               urm,
                               k=200,
                               alpha=0.4,
                               shrink=shrink,
                               binary=False,
                               verbose=True)
    #Computing ratings and remove seed
    eurm = ss.dot_product(t_urm, sim.T, k=750)
    del sim
    eurm = eurm_remove_seed(eurm, dr)
    #evaluation
    res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False)
    del eurm
    return res[0:3], config
Пример #9
0
    def track(self):
        eurm = sps.lil_matrix(self.urm_of.shape)
        pids = self.dr_on.get_test_pids(cat=2)
        pids_all = self.dr_of.get_test_pids()

        for row in tqdm(pids):
            track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.urm_on.indptr[row + 1]][0]

            playlists =  self.urm_col.indices[ self.urm_col.indptr[track_ind]: self.urm_col.indptr[track_ind+1]]

            top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32)
            track_ind_rec = top.argsort()[-501:][::-1]

            eurm[row, track_ind_rec] = top[track_ind_rec]

        eurm = eurm.tocsr()[pids_all]
        eurm = eurm_remove_seed(eurm, self.dr_on)
        print(eurm)
        return eurm.copy()
Пример #10
0
    def album(self):
        eurm = sps.lil_matrix(self.urm_of.shape)
        pids = self.dr_on.get_test_pids(cat=2)
        pids_all = self.dr_of.get_test_pids()
        ucm_album = self.dr_of.get_ucm_albums().tocsc()
        album_dic = self.dr_of.get_track_to_album_dict()

        for row in tqdm(pids):
            track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.urm_on.indptr[row + 1]][0]

            album = album_dic[track_ind]
            playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album.indptr[album+1]]

            top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32)
            track_ind_rec = top.argsort()[-501:][::-1]

            eurm[row, track_ind_rec] = top[track_ind_rec]

        eurm = eurm.tocsr()[pids_all]
        eurm = eurm_remove_seed(eurm, self.dr_on)

        return eurm
def recsys(shrink):
    alpha = 0.25
    beta = 0.65
    k = 200
    config = ('alpha=%.2f beta=%.2f k=%d shrink=%d binary=False' %
              (alpha, beta, k, shrink))
    #print(config)
    sim = ss.tversky_similarity(urm.T,
                                urm,
                                k=k,
                                alpha=alpha,
                                beta=beta,
                                shrink=shrink,
                                binary=False,
                                verbose=True)
    #Computing ratings and remove seed
    eurm = ss.dot_product(t_urm, sim.T, k=750)
    del sim
    eurm = eurm_remove_seed(eurm, dr)
    #evaluation
    res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False)
    del eurm
    return res[0:3], config
Пример #12
0
                    result_dict[song] = result_dict[song]+fs.freq*len(fs.sequence)

                else:
                    result_dict[song] = fs.freq


        for song_predicted in result_dict:

            pred[i,song_predicted] = result_dict[song_predicted]





    eurm = eurm_remove_seed(pred , dr )

    rec_list = eurm_to_recommendation_list(eurm)

    ev.evaluate(rec_list, "cat2_top",verbose=True, do_plot=True, show_plot=True, save=True, )



# seuences: [15565, 6186, 6288, 6292, 6294, 6295, 6298, 6310, 6334, 6336, 6337, 6339, 6340, 6362, 6380, 6387, 7597, 7603, 7604, 7605, 7606, 7607, 6173, 6077, 6040, 6027, 74, 76, 77, 81, 282, 768, 2163, 2506, 2507, 2508, 7609, 3084, 3166, 3183, 3282, 3283, 3697, 4211, 4420, 4443, 4493, 6019, 3162, 73, 8408, 8460, 15544, 15545, 15546, 15547, 15548, 15549, 15550, 15551, 15552, 15553, 15554, 15555, 15556, 15557, 15558, 15559, 15560, 15561, 15562, 15563, 15564, 15543, 15503, 15152, 14809, 8484, 8940, 10480, 10527, 10820, 11192, 11200, 11482, 11500, 11512, 8409, 12605, 12710, 12714, 12716, 12728, 12794, 13689, 13692, 14467, 14797, 14801, 12610, 51]
# seuences: [11500]
#
#
#
# [[11500], [12714]], 62
# [[11500], [70]], 62
# [[11500], [64]], 70
        for token in tokens:
            playlists_with_tokens.extend(
                ucm_csc.indices[ucm_csc.indptr[token]:ucm_csc.indptr[token +
                                                                     1]])

        urm_tmp = urm_csr[playlists_with_tokens]

        track_total_interactions = np.array(urm_tmp.sum(axis=0)).astype(
            np.int32)[0, :]  # like ravel

        top_pop = track_total_interactions.argsort()[-750:][::-1]

        rec_list[i] = top_pop
        i += 1

    np.save("nlp_toketoppop_rec_list_offline", rec_list)

    eurm = rec_list_to_eurm(rec_list=rec_list)
    eurm = eurm_remove_seed(eurm, dr)

    rec_list = eurm_to_recommendation_list(eurm)

    ev.evaluate(
        rec_list,
        "WEILA2_toktoktop_pop",
        verbose=True,
        do_plot=True,
        show_plot=True,
        save=True,
    )
    x0 = None
    y0 = None

    if os.path.isfile(ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/memory/cat'+ str(cat)+'_y0_MEMORY.pkl') and \
            os.path.isfile(ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/memory/cat' + str(cat) + '_x0_MEMORY.pkl'):
        x0 = load_obj('cat' + str(cat) + '_x0_MEMORY', path= ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/memory/')
        y0 = load_obj('cat' + str(cat) + '_y0_MEMORY', path= ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/memory/')
        global_counter = len(y0)
        print("[ CAT"+str(cat)+" : RESUMING FROM RUN", global_counter, "]")

    print("[ CAT "+str(cat)+": STARTING, NOW LOADING MATRICES ]")
    matrices_names = read_params_dict(ROOT_DIR+'/bayesian_scikit/'+configuration_name+'/name_settings')[cat-1]
    file_locations = read_params_dict(ROOT_DIR+'/bayesian_scikit/bayesian_common_files/file_locations_offline')

    matrices_array = [norm( eurm_remove_seed( sps.load_npz(file_locations[x]), dr)[start_index:end_index]) for x in matrices_names ]

    del dr
    start_time=time.time()

    space  = [Real(0, 100, name=x) for x in matrices_names]
    res = gp_minimize(objective_function,  space,
                base_estimator=None,
                n_calls=450+len(matrices_array)*calls_constant, n_random_starts=100,
                acq_func='gp_hedge',
                acq_optimizer='auto',
                x0=x0, y0=y0,
                random_state=None, verbose=False,
                callback=None, n_points=100,
                n_restarts_optimizer=10,
                xi=0.012, kappa=1.96,
Пример #15
0
    for playlist_id in tqdm(test_playlists, desc="shao belo"):

        songs = urm_csr.indices[urm_csr.indptr[playlist_id]:urm_csr.
                                indptr[playlist_id + 1]]

        playlists_with_tokens = urm_csc.indices[
            urm_csc.indptr[songs[0]]:urm_csc.indptr[songs[0] + 1]]

        track_total_interactions = urm_csr[playlists_with_tokens].sum(
            axis=0).A1

        top_pop = track_total_interactions.argsort()[-601:][::-1]

        rec_list[i] = top_pop

        i += 1

    eurm = eurm_remove_seed(rec_list_to_eurm(rec_list), dr)

    rec_list = eurm_to_recommendation_list(eurm)

    ev.evaluate(
        rec_list,
        "cat2_top",
        verbose=True,
        do_plot=True,
        show_plot=True,
        save=True,
    )

    sps.save_npz("top_pop_cat2_" + mode, eurm)
from utils import post_processing as post
import scipy.sparse as sps
from utils.datareader import Datareader
from utils.post_processing import eurm_remove_seed

mode = "online"

dr = Datareader(verbose=False, mode=mode, only_load="False")

name = mode + "/slim_online"

eurm = eurm_remove_seed(sps.load_npz(mode + "/slim_online.npz"), dr)
# sps.save_npz(mode+"/online_nlp_knn100_bm25.npz",eurm)

for i in range(1, 11):

    indices = dr.get_test_pids_indices(cat=i)
    save = eurm[indices]
    sps.save_npz(name + "-cat" + str(i) + ".npz", save)
        pids = dr.get_test_pids()
        urm.data = np.ones(len(urm.data))

        ut.inplace_set_rows_zero(
            X=urm, target_rows=pids)  #don't learn from challange set
        urm.eliminate_zeros()

        p_ui = normalize(urm, norm="l1")
        p_iu = normalize(urm.T, norm="l1")
        top = urm.sum(axis=0).A1

        # Fitting data
        rec.fit(p_ui, p_iu, top, pids)

        #Computing similarity/model
        rec.compute_model(top_k=knn,
                          shrink=250,
                          alpha=0.5,
                          beta=0.5,
                          verbose=True)

        # INJECTING URM POS with only last 25 songs
        rec.urm = dr.get_last_n_songs_urm(n=cut)

        #Computing ratings
        rec.compute_rating(top_k=topk, datareader=dr, verbose=True, small=True)

        rec.eurm = eurm_remove_seed(rec.eurm, dr)

        sps.save_npz(complete_name, rec.eurm)
from utils.post_processing import eurm_to_recommendation_list, eurm_remove_seed
from personal.Ervin.Word2Vec_recommender import W2VRecommender
from personal.Ervin.ItemRank import ItemRank
from personal.Ervin.tf_collaborative_user import TF_collaborative_user
from recommenders.knn_collaborative_item import Knn_collaborative_item


if __name__ == '__main__':
    dr = Datareader(only_load=True, mode='offline', test_num='1', verbose=False)
    pid = dr.get_test_playlists().transpose()[0]
    urm = dr.get_urm()
    urm.data = np.ones(len(urm.data))
    ev = Evaluator(dr)

    TFRec = Knn_collaborative_item()
    W2V = W2VRecommender()
    TFRec.fit(urm, pid)
    W2V.fit(urm, pid)

    TFRec.compute_model(verbose=True, top_k=850)
    TFRec.compute_rating(top_k=750, verbose=True, small=True)
    W2V.compute_model(verbose=True, size=50, window=None)
    W2V.compute_rating(verbose=True, small=True, top_k=750)
    TFRec.eurm = norm_l1_row(eurm_remove_seed(TFRec.eurm, dr))
    W2V.eurm = norm_l1_row(eurm_remove_seed(W2V.eurm, dr))

    for alpha in np.arange(0.9, 0, -0.1):
        print('[ Alpha = {:.1f}]'.format(alpha))
        eurm = alpha * TFRec.eurm + (1-alpha)*W2V.eurm
        ev.evaluate(recommendation_list=eurm_to_recommendation_list(eurm, remove_seed=False, datareader=dr),
                name="KNNItem_W2V"+str(alpha), old_mode=False, save=True)
    file_locations = read_params_dict(ROOT_DIR + '/bayesian_scikit/bayesian_common_files/file_locations_' + mode)


    # LOAD MATRICES
    matrices_loaded=dict()
    all_matrices_names = set()
    for cat in range(1,11):

        with open(ROOT_DIR+'/bayesian_scikit/'+configuration_name + '/best_params/cat'+str(cat)+'_params_dict') as f:
            best_params_dict = json.load(f)

        for name, value_from_bayesian in best_params_dict.items():
            all_matrices_names.add(name)
    for name in  tqdm(all_matrices_names,desc='loading matrices'):
        if name not in matrices_loaded.keys() and name!='norm':
            matrices_loaded[name] = eurm_remove_seed(sps.load_npz(file_locations[name]), dr)

    rec_list = [[] for x in range(10000)]
    eurms_cutted = [[] for x in range(10)]

    # BUILDING THE EURM FROM THE PARAMS
    for cat in tqdm(range(1,11),desc="summing up the matrices"):

        start_index = (cat - 1) * 1000
        end_index = cat * 1000

        best_params_dict = read_params_dict(name='cat' + str(cat) + '_params_dict',
                 path=ROOT_DIR + '/bayesian_scikit/' + configuration_name + '/best_params/')


        norm = best_params_dict['norm']
    def get_top_pop_track(self, mode):
        '''
        :return: csr_matrix filled with the reccomendation for the cat 2 following track
        '''
        if mode == "online":
            self.dr_on = Datareader(verbose=False,
                                    mode='online',
                                    only_load=True)
            self.urm_on = self.dr_on.get_urm()
            self.urm_col = sps.csc_matrix(self.urm_on)
            self.top_p = np.zeros(self.urm_on.shape[1])

            eurm = sps.lil_matrix(self.urm_on.shape)
            pids = self.dr_on.get_test_pids(cat=2)
            pids_all = self.dr_on.get_test_pids()

            for row in tqdm(pids):
                track_ind = self.urm_on.indices[self.urm_on.indptr[row]:self.
                                                urm_on.indptr[row + 1]][0]

                playlists = self.urm_col.indices[self.urm_col.
                                                 indptr[track_ind]:self.
                                                 urm_col.indptr[track_ind + 1]]

                top = self.urm_on[playlists].sum(axis=0).A1.astype(np.int32)
                track_ind_rec = top.argsort()[-501:][::-1]

                eurm[row, track_ind_rec] = top[track_ind_rec]

            eurm = eurm.tocsr()[pids_all]
            eurm = eurm_remove_seed(eurm, self.dr_on)

        elif mode == "offline":
            self.dr_of = Datareader(verbose=False,
                                    mode='offline',
                                    only_load=True)
            self.urm_of = self.dr_of.get_urm()
            self.urm_col = sps.csc_matrix(self.urm_of)
            self.top_p = np.zeros(self.urm_of.shape[1])

            eurm = sps.lil_matrix(self.urm_of.shape)
            pids = self.dr_of.get_test_pids(cat=2)
            pids_all = self.dr_of.get_test_pids()

            for row in tqdm(pids):
                track_ind = self.urm_of.indices[self.urm_of.indptr[row]:self.
                                                urm_of.indptr[row + 1]][0]

                playlists = self.urm_col.indices[self.urm_col.
                                                 indptr[track_ind]:self.
                                                 urm_col.indptr[track_ind + 1]]

                top = self.urm_of[playlists].sum(axis=0).A1.astype(np.int32)
                track_ind_rec = top.argsort()[-501:][::-1]

                eurm[row, track_ind_rec] = top[track_ind_rec]

            eurm = eurm.tocsr()[pids_all]
            eurm = eurm_remove_seed(eurm, self.dr_of)

        return eurm.copy().tocsr()
Пример #21
0
    # TopPop Album
    album = artists_dic[track_ind]
    playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album.
                                  indptr[album + 1]]

    top = urm[playlists].sum(axis=0).A1.astype(np.int32)

    track_ind_rec = top.argsort()[-501:][::-1]

    eurm2[row, track_ind_rec] = top[track_ind_rec]

eurm1 = eurm1.tocsr()[pids_all]
eurm2 = eurm2.tocsr()[pids_all]

eurm1 = eurm_remove_seed(eurm1, dr)
eurm2 = eurm_remove_seed(eurm2, dr)

sps.save_npz("test1.npz", eurm1)

rec_list1 = eurm_to_recommendation_list(eurm1)
rec_list2 = eurm_to_recommendation_list(eurm2)
rec_list3 = append_rec_list(rec_list1 + rec_list2)

ev = Evaluator(dr)
ev.evaluate(rec_list1, name="enstest", level='track')
ev.evaluate(rec_list2, name="enstest", level='track')
ev.evaluate(rec_list3, name="enstest", level='track')

# rec.append(list(top_p))
Пример #22
0
        arg = load_obj("best/cat" + str(i) + "")
        w.append(reorder(dict(arg[:len(arg) - 1][0]), name[i - 1]))

    print("[ Loading matrix name ]")
    if mode == "offline":
        matrix_dict = load_obj("matrix_dict", path="")
        dir = "offline/"

    if mode == "online":
        matrix_dict = load_obj("matrix_dict_online", path="")
        dir = "online/"

    _name = flatten(name)
    loaded_matrix = dict(
        zip(_name, [
            eurm_remove_seed(sps.load_npz(directory + matrix_dict[n]), dr)
            for n in _name
        ]))

    matrix = []

    if type == "unique":
        print("[ Loading cat 1 ]")
        cat = 1
        m = list()
        for n in name[cat - 1]:
            m.append(loaded_matrix[n][0:1000])
        matrix.append(m)

        print("[ Loading cat 2 ]")
        cat = 2