示例#1
0
def prova():

    dr = Datareader(mode='offline', only_load=True)
    print(dr.get_artist_to_tracks_dict())
    exit()

    dr = Datareader(mode='offline', only_load=True, verbose=False)
    test_playlists = dr.get_test_pids()

    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)

    nlp = NLP(mode='playlists', datareader=dr, stopwords=STOP_WORDS)
    s = nlp.get_ucm()
    print(s.shape)

    evaluator = Evaluator(dr)

    ucm = nlp.get_ucm()
    sim = sparse.load_npz(ROOT_DIR + '/data/cf_user_similarity.npz')

    print('Computing dot...')
    ucm = dot_product(sim, ucm, k=200)
    print('NNZ', ucm.nnz)
    exit()

    urm = dr.get_urm()

    # ucm = ucm.astype(np.float64)
    # inplace_csr_column_scale(ucm, token_weights)

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(similarity, urm, k=500)
    eurm_nlp = eurm_nlp.tocsr()
    eurm_nlp = eurm_nlp[test_playlists, :]

    #sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp)
    evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp),
                       name='nlp_enriched')
示例#2
0
def icm():
    datareader = Datareader(mode='offline', only_load=True)
    evaluator = Evaluator(datareader)

    print('NLP...')
    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)
    test_playlists = datareader.get_test_pids()

    nlp = NLP(datareader=datareader, stopwords=[], mode='tracks')
    print('Getting ucm and icm...')
    icm = nlp.get_icm()
    icm = bm25_row(icm)

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(icm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    urm = datareader.get_urm()

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(urm[test_playlists, :], similarity, k=500)
    eurm_nlp = eurm_nlp.tocsr()

    # sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp)
    evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp),
                       name='nlp_enriched')
示例#3
0
def online():
    datareader = Datareader(mode='online', only_load=True)

    print('NLP...')
    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)

    nlp = NLP(datareader, stopwords=[])
    ucm = nlp.get_ucm()
    #ucm = bm25_row(ucm)
    #inplace_csr_column_scale(ucm, token_weights)

    urm = datareader.get_urm_shrinked()[0]

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(similarity, urm, k=500)
    eurm_nlp = eurm_nlp.tocsr()
    print(eurm_nlp.shape)
    eurm_nlp = eurm_nlp[-10000:, :]

    sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_no_stop_online.npz', eurm_nlp)
示例#4
0
def evaluateRecommendationsSpotify(self):
    # print("Recommender: sparsity self.W_sparse:", self.W_sparse.nnz / self.W_sparse.shape[1] / self.W_sparse.shape[0])

    user_profile_batch = self.URM_train[pids_converted]
    print("dot product")
    eurm = dot_product(user_profile_batch, self.W_sparse, k=750).tocsr()
    eurm = eurm_remove_seed(eurm)

    recommendation_list = np.zeros((10000, 500))
    for row in range(eurm.shape[0]):
        val = eurm[row].data
        ind = val.argsort()[-500:][::-1]
        ind = eurm[row].indices[ind]
        recommendation_list[row, 0:len(ind)] = ind

    prec_t, ndcg_t, clicks_t, prec_a, ndcg_a, clicks_a = ev.evaluate(
        recommendation_list=recommendation_list,
        name=self.configuration + "_epoca" + str(self.currentEpoch),
        return_overall_mean=True,
        verbose=False,
        show_plot=False,
        do_plot=True)

    results_run = {}
    results_run["prec_t"] = prec_t
    results_run["ndcg_t"] = ndcg_t
    results_run["clicks_t"] = clicks_t
    results_run["prec_a"] = prec_a
    results_run["ndcg_a"] = ndcg_a
    results_run["clicks_a"] = clicks_a

    return (results_run)
示例#5
0
def evaluateRecommendationsSpotify_RECOMMENDER(recommender):
    """
    THIS FUNCTION WORKS INSIDE THE RECOMMENDER
    :param self:
    :return:
    """
    user_profile_batch = recommender.URM_train[pids_converted]

    eurm = dot_product(user_profile_batch, recommender.W_sparse, k=500).tocsr()
    recommendation_list = np.zeros((10000, 500))
    for row in tqdm(range(eurm.shape[0]), desc="spotify rec list"):
        val = eurm[row].data
        ind = val.argsort()[-500:][::-1]
        ind = eurm[row].indices[ind]
        recommendation_list[row, 0:len(ind)] = ind

    prec_t, ndcg_t, clicks_t, prec_a, ndcg_a, clicks_a = ev.evaluate(
        recommendation_list=recommendation_list,
        name=recommender.configuration + "epoca" +
        str(recommender.currentEpoch),
        return_overall_mean=True,
        verbose=False,
        show_plot=False,
        do_plot=True)
    results_run = {}
    results_run["prec_t"] = prec_t
    results_run["ndcg_t"] = ndcg_t
    results_run["clicks_t"] = clicks_t
    results_run["prec_a"] = prec_a
    results_run["ndcg_a"] = ndcg_a
    results_run["clicks_a"] = clicks_a
    return (results_run)
示例#6
0
    def fitnessFunction(self, individual):

        # Convert list into a numpy array
        individual = np.array(individual)

        # Make a copy of the UCM and filter it for each column
        if self.verbose:
            print('Filtering UCM...')
        start = time.time()
        UCM_filtered = self.UCM.copy()
        UCM_filtered = UCM_filtered.astype(np.float64)
        inplace_csr_column_scale(UCM_filtered, individual)
        if self.verbose:
            print('UCM filtered in', time.time() - start, 'sec')

        # Compute similarity
        if self.verbose:
            print('Computing similarity...')
        start = time.time()
        similarity = tversky_similarity(UCM_filtered, shrink=200, alpha=0.1,
                                        beta=1, target_items=self.test_playlists_indices,
                                        binary=False)
        similarity = similarity.tocsr()
        if self.verbose:
            print('Similarity computed in', time.time() - start, 'sec')

        # Compute eurm
        if self.verbose:
            print('Computing eurm...')
        start = time.time()
        eurm = dot_product(similarity, self.URM_train, k=500)
        if self.verbose:
            print('eurm computed in', time.time() - start, 'sec')
            print('Converting eurm in csr...')
        start = time.time()
        eurm = eurm.tocsr()
        eurm = eurm[self.test_playlists_indices, :]
        if self.verbose:
            print('eurm converted in', time.time() - start, 'sec')

        # Evaluate
        rec_list = eurm_to_recommendation_list(eurm)
        print('current', self.current)

        score_cat_1 = self.evaluator.evaluate_single_metric(rec_list, name='Genetic', metric='prec',
                                                            level='track', cat=1, verbose=False)
        score_cat_2 = self.evaluator.evaluate_single_metric(rec_list, name='Genetic', metric='prec',
                                                            level='track', cat=2, verbose=False)
        score = (score_cat_1 + score_cat_2) / 2

        self.current += 1

        if self.verbose:
            print(score)

        print("Numfeatures {}".format(np.sum(individual)))
        print('\n')

        return score,
    def get_eurm_from_icm(self, urm, test_playlists):
        """
        Compute the ICM, then the similarity and return the EURM sliced for test playlists.
        :param test_playlists: the pids of the test playlists
        :return: eurm: the estimated eurm of shape (10K, 2M)
        """
        self.urm = urm
        self.get_similarity_from_icm()

        if self.verbose:
            print('Computing similarity from ucm...')

        if self.datareader.__online():
            self.eurm = dot_product(self.urm[-10000, :],
                                    self.similarity_icm,
                                    k=500)
        else:
            self.eurm = dot_product(self.urm[test_playlists, :],
                                    self.similarity_icm,
                                    k=500)

        self.eurm = self.eurm.tocsr()
        return self.eurm
示例#8
0
def evaluate_shrinked(W_sparse, urm_shrinked,  pids_shrinked ):

    W_sparse = W_sparse[pids_shrinked]

    eurm = dot_product(W_sparse, urm_shrinked, k=750).tocsr()

    eurm = eurm_remove_seed(eurm=eurm)

    rec_list = eurm_to_recommendation_list(eurm)


    ev.evaluate(recommendation_list=rec_list,
                name="slim_structure_parametribase_BPR_epoca_0_noepoche",
                return_overall_mean=False,
                show_plot=False, do_plot=True)
def evaluate_shrinked(W_sparse, urm_shrinked, pids_shrinked):

    user_profile_batch = urm_shrinked[pids_shrinked]

    eurm = dot_product(user_profile_batch, W_sparse, k=500).tocsr()
    recommendation_list = np.zeros((10000, 500))
    for row in tqdm(range(eurm.shape[0]), desc="spotify rec list shrinked"):
        val = eurm[row].data
        ind = val.argsort()[-500:][::-1]
        ind = eurm[row].indices[ind]
        recommendation_list[row, 0:len(ind)] = ind

    ev.evaluate(recommendation_list=recommendation_list,
                name="slim_structure_parametribase_BPR_epoca_0_noepoche",
                return_overall_mean=False,
                show_plot=False,
                do_plot=True)
示例#10
0
def new():
    datareader = Datareader(mode='offline', only_load=True)
    evaluator = Evaluator(datareader)

    print('NLP...')
    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)
    test_playlists = datareader.get_test_pids()

    nlp = NLP(datareader=datareader, stopwords=[], mode='both')
    print('Getting ucm and icm...')
    ucm = nlp.get_ucm()
    ucm = bm25_row(ucm)
    icm = nlp.get_icm()
    icm = bm25_row(icm)
    icm_T = icm.T

    #ucm = bm25_row(ucm)

    #urm = datareader.get_urm()

    print('Computing eurm...')
    start = time.time()
    eurm_nlp = dot_product(ucm[test_playlists, :], icm_T, k=500)
    print(time.time() - start)

    print('Converting to csr...')
    eurm_nlp = eurm_nlp.tocsr()
    print(eurm_nlp.shape)
    #eurm_nlp = eurm_nlp[test_playlists:, :]

    sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_new_method_offline.npz',
                    eurm_nlp)
    evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp),
                       name='nlp_new_method',
                       show_plot=False)
test_playlists = dr.get_test_pids()
print('ucm', ucm.shape)
print('Computing similarity...')
start = time.time()
# Compute similarity
ucm= bm25_row(ucm)

similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1)
similarity = similarity.tocsr()
print(time.time() - start)


print('Computing eurm...')
start = time.time()
# Compute eurm
eurm = dot_product(similarity, urm, k=500)
eurm = eurm.tocsr()
eurm = eurm[test_playlists, :]
print('eurm', eurm.shape)
print(time.time() - start)


# Evaluating
rec_list = eurm_to_recommendation_list(eurm)

sps.save_npz("nlp_eurm_online_bm25.npz", eurm, compressed=False)
np.save("nlp_rec_list_online_bm25",rec_list)

evaluator.evaluate(rec_list, name='AAANLP_bm25_'+nome, verbose=True, show_plot=False)

print('ucm...')
ucm = sparse.csr_matrix((np.ones(len(playlists)), (playlists, artists)),
                        shape=(1049361, len(dr.get_artists())))
ucm = ucm.tocsr()
ucm = ucm[pids]
print(ucm.shape)

ucm = bm25_row(ucm)

print('similarity..')
sim = tversky_similarity(ucm,
                         ucm.T,
                         shrink=200,
                         alpha=0.1,
                         beta=1,
                         k=800,
                         verbose=1,
                         binary=False)
sim = sim.tocsr()

test_pids = list(dr.get_test_pids())

eurm = dot_product(sim, urm, k=750)
eurm = eurm.tocsr()
eurm = eurm[test_pids, :]
sparse.save_npz('eurm_artists.npz', eurm)

#ev.evaluate(eurm_to_recommendation_list(eurm), name='cbf_user_artist', show_plot=False)

exit()
示例#13
0
                  lanca2=lanca2)

        ucm = nlp.get_UCM(data1=data1)
        urm = dr.get_urm()
        test_playlists = dr.get_test_pids()
        ucm = bm25_row(ucm)

        similarity = tversky_similarity(ucm,
                                        binary=False,
                                        shrink=1,
                                        alpha=0.1,
                                        beta=1)
        similarity = similarity.tocsr()

        #eurm
        eurm = dot_product(similarity, urm, k=topk)
        eurm = eurm.tocsr()
        eurm = eurm[test_playlists, :]

        rec_list = eurm_to_recommendation_list(eurm)

        sps.save_npz(mode + "_" + name + "_bm25.npz", eurm, compressed=False)
        np.save(mode + "_" + name + "_bm25", rec_list)

        #evaluate
        ev = Evaluator(dr)
        ev.evaluate(rec_list, name=name, verbose=True, show_plot=False)

    if mode == "online":

        nlp = NLP(dr,
示例#14
0
                 lambda_j=lambda_j,
                 learning_rate=learning_rate,
                 topK=topk,
                 sgd_mode='adam',
                 gamma=0.999,
                 beta_1=beta_1,
                 beta_2=beta_2,
                 stop_on_validation=True,
                 lower_validatons_allowed=1,
                 validation_metric="ndcg_t",
                 validation_function=evaluate_for_online,
                 validation_every_n=1)

        # calculating eurm, evaluation, save
        user_profile_batch = slim.URM_train[pids_converted]
        eurm = dot_product(user_profile_batch, slim.W_sparse, k=500).tocsr()
        recommendation_list = eurm_to_recommendation_list(eurm)

        sps.save_npz(ROOT_DIR + "/results/" + complete_name + ".npz",
                     eurm,
                     compressed=False)
        ev.evaluate(recommendation_list=recommendation_list,
                    name=complete_name)

    elif mode == "online":
        ####### DATA INIZIALIZATION ONLINE #################
        dummy_variable = 0
        dr = Datareader(mode="online", only_load=True, verbose=False)
        pids = dr.get_test_pids()

        urm = dr.get_urm()
示例#15
0
        nlp_strict = NLPStrict(dr)

        # Get ucm
        ucm = nlp_strict.get_UCM()

        # Compute similarity (playlists x playlists)
        sim = tversky_similarity(ucm,
                                 ucm.T,
                                 shrink=200,
                                 alpha=0.1,
                                 beta=1,
                                 k=knn)
        sim = sim.tocsr()

        # Recommendation
        eurm = dot_product(sim, urm, k=topk)
        eurm = eurm.tocsr()
        eurm = eurm[test_pids, :]

        rec_list = eurm_to_recommendation_list(eurm, dr)

        if save_eurm:
            sps.save_npz(mode + "_" + name + ".npz", eurm, compressed=False)

        # Submission
        ev.evaluate(rec_list, name=name)

    elif mode == 'online':
        # Setup
        dr = Datareader(mode=mode, verbose=False, only_load=True)
        sb = Submitter(dr)