def offline():
    # INIT
    dr = Datareader(mode='offline', only_load=True, verbose=False)
    ev = Evaluator(dr)

    # LOAD AND COMBINE
    eurm_lele = sparse.load_npz(
        ROOT_DIR +
        '/data/lele/ensembled_CLUSTERARTISTScat4-5-6-8-10_offline.npz')
    eurm_std = sparse.load_npz(ROOT_DIR +
                               '/data/lele/ensembled_SUBCREATIVA_offline.npz')

    eurm_ens = combine_two_eurms(eurm_lele,
                                 eurm_std,
                                 cat_first=[4, 5, 6, 8, 10])

    # LOAD
    # eurm_ens = sparse.load_npz(ROOT_DIR + '/data/ensembled_creativeFIRE_offline.npz')
    sim = sparse.load_npz(ROOT_DIR + '/data/sim_offline.npz')

    # TOPBOOST
    # topb = TopBoost(dr, eurm_ens, sim)
    # eurm_ens = topb.boost_eurm(categories=[9], top_k=100, gamma=0.01)

    # HOLEBOOST
    hb = HoleBoost(similarity=sim,
                   eurm=eurm_ens,
                   datareader=dr,
                   norm=norm_l1_row)
    eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1)
    hb = HoleBoost(similarity=sim,
                   eurm=eurm_ens,
                   datareader=dr,
                   norm=norm_l1_row)
    eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1)

    # TAILBOOST
    tb = TailBoost(similarity=sim,
                   eurm=eurm_ens,
                   datareader=dr,
                   norm=norm_l2_row)
    eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5],
                             last_tracks=[10, 3, 3, 3],
                             k=[100, 80, 100, 100],
                             gamma=[0.01, 0.01, 0.01, 0.01])

    # ALBUMBOOST
    ab = AlbumBoost(dr, eurm_ens)
    eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9],
                             gamma=2,
                             top_k=[3, 3, 10, 40])

    # MATCHBOOST
    # mb = MatchBoost(datareader=dr, eurm=eurm_ens, top_k_alb=5000, top_k_art=10000)
    # eurm_ens, pids = mb.boost_eurm(categories='all', k_art=300, k_alb=300, gamma_art=0.1, gamma_alb=0.1)

    # EVALUATION
    rec_list = eurm_to_recommendation_list(eurm_ens, datareader=dr)
    sparse.save_npz('FINAL.npz', eurm_ens)
    ev.evaluate(rec_list, name='LELE_boosts.csv')
Пример #2
0
def icm():
    datareader = Datareader(mode='offline', only_load=True)
    evaluator = Evaluator(datareader)

    print('NLP...')
    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)
    test_playlists = datareader.get_test_pids()

    nlp = NLP(datareader=datareader, stopwords=[], mode='tracks')
    print('Getting ucm and icm...')
    icm = nlp.get_icm()
    icm = bm25_row(icm)

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(icm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    urm = datareader.get_urm()

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(urm[test_playlists, :], similarity, k=500)
    eurm_nlp = eurm_nlp.tocsr()

    # sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp)
    evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp),
                       name='nlp_enriched')
def grid_holeboost():
    datareader = Datareader(mode='offline', only_load=True, verbose=False)
    ev = Evaluator(datareader)

    # LOAD AND COMBINE
    eurm_lele = sparse.load_npz(
        ROOT_DIR +
        '/data/lele/ensembled_CLUSTERARTISTScat4-5-6-8-10_offline.npz')
    eurm_std = sparse.load_npz(ROOT_DIR +
                               '/data/lele/ensembled_SUBCREATIVA_offline.npz')

    eurm_ens = combine_two_eurms(eurm_lele,
                                 eurm_std,
                                 cat_first=[4, 5, 6, 8, 10])
    sim_offline = sparse.load_npz(ROOT_DIR + '/data/sim_offline.npz')

    for k in [50, 100, 150, 200, 250, 300, 350, 400]:
        for gamma in [1, 2, 5, 10]:

            h = HoleBoost(similarity=sim_offline,
                          eurm=eurm_ens,
                          datareader=datareader,
                          norm=norm_l1_row)
            eurm_ens_boosted = h.boost_eurm(categories=[8, 10],
                                            k=k,
                                            gamma=gamma)
            rec_list = eurm_to_recommendation_list(eurm_ens_boosted,
                                                   datareader=datareader)

            print(
                '--------------------------------------------------------------------------'
            )
            print('K =', k)
            print('G =', gamma)
            ev.evaluate(rec_list, name='hb', save=False)
Пример #4
0
def prova():

    dr = Datareader(mode='offline', only_load=True)
    print(dr.get_artist_to_tracks_dict())
    exit()

    dr = Datareader(mode='offline', only_load=True, verbose=False)
    test_playlists = dr.get_test_pids()

    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)

    nlp = NLP(mode='playlists', datareader=dr, stopwords=STOP_WORDS)
    s = nlp.get_ucm()
    print(s.shape)

    evaluator = Evaluator(dr)

    ucm = nlp.get_ucm()
    sim = sparse.load_npz(ROOT_DIR + '/data/cf_user_similarity.npz')

    print('Computing dot...')
    ucm = dot_product(sim, ucm, k=200)
    print('NNZ', ucm.nnz)
    exit()

    urm = dr.get_urm()

    # ucm = ucm.astype(np.float64)
    # inplace_csr_column_scale(ucm, token_weights)

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(similarity, urm, k=500)
    eurm_nlp = eurm_nlp.tocsr()
    eurm_nlp = eurm_nlp[test_playlists, :]

    #sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp)
    evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp),
                       name='nlp_enriched')
Пример #5
0
def main(args):
    sys.stdout = Logger(args.log_dir)

    train_loader, test_loader = get_data(args)
    model = BaseModel(args)
    evaluator = Evaluator(model=model, data_loader=test_loader)

    best_acc = evaluator.evaluate()

    accuracies = [best_acc]
    losses = []

    for e in range(1, args.epochs + 1):
        epoch_loss = 0
        print("Epoch", e)
        for data in tqdm(train_loader):
            model.set_input(data)
            model.optimize_parameters()
            epoch_loss += model.get_loss()

        print("Epoch finished with loss", epoch_loss)
        losses.append(epoch_loss)

        if e % args.eval_step == 0:
            acc = evaluator.evaluate()
            accuracies.append(acc)
            best_acc = max(acc, best_acc)
            print("[Epoch {}] Accuracy:{:.2f}, Best Accuracy:{:.2f}".format(
                e, acc, best_acc))

        if e % args.save_step == 0:
            model.save_model(e)

        model.update_lr()

        plt.figure()
        plt.plot(range(len(losses)), losses)
        plt.xlabel('Epochs')
        plt.ylabel('Training Loss')
        plt.savefig(os.path.join(args.exp_dir, 'losses.png'))

        plt.figure()
        plt.plot(range(len(accuracies)), accuracies)
        plt.xlabel('Epochs')
        plt.ylabel('Test Accuracy')
        plt.savefig(os.path.join(args.exp_dir, 'accuracies.png'))
def grid_tailboost():
    datareader = Datareader(mode='offline', only_load=True, verbose=False)
    ev = Evaluator(datareader)

    # LOAD AND COMBINE
    eurm_lele = sparse.load_npz(
        ROOT_DIR +
        '/data/lele/ensembled_CLUSTERARTISTScat4-5-6-8-10_offline.npz')
    eurm_std = sparse.load_npz(ROOT_DIR +
                               '/data/lele/ensembled_SUBCREATIVA_offline.npz')

    eurm_ens = combine_two_eurms(eurm_lele,
                                 eurm_std,
                                 cat_first=[4, 5, 6, 8, 10])
    sim = sparse.load_npz(ROOT_DIR + '/data/sim_offline.npz')

    # TAILBOOST
    for lt in [2, 3, 5, 6, 10]:
        for k in [20, 50, 80, 100, 150]:
            for g in [0.005, 0.01, 0.02, 0.05]:

                tb = TailBoost(similarity=sim,
                               eurm=eurm_ens,
                               datareader=datareader,
                               norm=norm_l2_row)
                eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5],
                                         last_tracks=[lt, lt, lt, lt],
                                         k=[k, k, k, k],
                                         gamma=[g, g, g, g])
                rec_list = eurm_to_recommendation_list(eurm_ens,
                                                       datareader=datareader)

                print(
                    '--------------------------------------------------------------------------'
                )
                print('LT =', lt)
                print('K =', k)
                print('G =', g)
                ev.evaluate(rec_list, name='tb', save=False)
Пример #7
0
def new():
    datareader = Datareader(mode='offline', only_load=True)
    evaluator = Evaluator(datareader)

    print('NLP...')
    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)
    test_playlists = datareader.get_test_pids()

    nlp = NLP(datareader=datareader, stopwords=[], mode='both')
    print('Getting ucm and icm...')
    ucm = nlp.get_ucm()
    ucm = bm25_row(ucm)
    icm = nlp.get_icm()
    icm = bm25_row(icm)
    icm_T = icm.T

    #ucm = bm25_row(ucm)

    #urm = datareader.get_urm()

    print('Computing eurm...')
    start = time.time()
    eurm_nlp = dot_product(ucm[test_playlists, :], icm_T, k=500)
    print(time.time() - start)

    print('Converting to csr...')
    eurm_nlp = eurm_nlp.tocsr()
    print(eurm_nlp.shape)
    #eurm_nlp = eurm_nlp[test_playlists:, :]

    sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_new_method_offline.npz',
                    eurm_nlp)
    evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp),
                       name='nlp_new_method',
                       show_plot=False)
class CF_AL_BM25:
    def __init__(self,
                 urm,
                 ucm,
                 binary=False,
                 verbose=True,
                 mode='offline',
                 datareader=None,
                 verbose_evaluation=True,
                 bm25=False,
                 similarity='tversky'):
        assert (mode in ('offline', 'online'))
        if binary: ucm.data = np.ones(ucm.data.shape[0])
        self.urm = urm
        self.binary = binary
        self.verbose = verbose
        self.verbose_ev = verbose_evaluation
        self.dr = datareader
        self.mode = mode
        self.similarity = similarity
        self.bm25 = bm25
        ucm_aux = ucm.copy()
        ut.inplace_set_rows_zero(X=ucm_aux,
                                 target_rows=self.dr.get_test_pids()
                                 )  #don't learn from challange set
        ucm_aux.eliminate_zeros()
        if self.bm25: self.m_ui = bm25_row(ucm.copy()).tocsr()
        else: self.m_ui = ucm.copy().tocsr()
        if self.bm25: self.m_iu = bm25_col(ucm_aux.T.copy()).tocsr()
        else: self.m_iu = ucm_aux.T.copy().tocsr()
        if mode == 'offline':
            self.ev = Evaluator(self.dr)

    def model(self,
              alpha=1,
              beta=1,
              k=200,
              shrink=0,
              power=1,
              threshold=0,
              target_items=None):
        if target_items is None:
            target_items = self.dr.get_test_pids()  # work with s*urm
        self.alpha, self.beta = alpha, beta
        self.k = k
        self.power = power
        self.shrink, self.threshold = shrink, threshold
        if self.similarity == 'tversky':
            self.s = ss.tversky_similarity(self.m_ui,
                                           self.m_iu,
                                           k=k,
                                           shrink=shrink,
                                           alpha=alpha,
                                           beta=beta,
                                           threshold=threshold,
                                           verbose=self.verbose,
                                           target_items=target_items)
        elif self.similarity == 'dot':
            self.s = ss.dot_product_similarity(self.m_ui,
                                               self.m_iu,
                                               k=k,
                                               shrink=shrink,
                                               threshold=threshold,
                                               verbose=self.verbose,
                                               target_items=target_items)
        else:
            print('ERROR, similarity not implemented')
        if power != 1:
            self.s.data = np.power(self.s.data, power)

    def recommend(self, target_pids=None, eurm_k=750):
        #if target_pids is None it calculate the whole eurm
        self.eurm = ss.dot_product(self.s,
                                   self.urm,
                                   k=eurm_k,
                                   target_items=target_pids,
                                   verbose=self.verbose)
        # TODO: here we can try some postprocessing on eurm if complete (like normalize for column)

    #### METHODS FOR OFFLINE MODE ####
    def fast_recommend(self, target_pids=None, eurm_k=750):
        assert (self.mode == 'offline')
        if target_pids is None: target_pids = self.dr.get_test_pids()
        self.recommend(target_pids=target_pids, eurm_k=eurm_k)

    def fast_evaluate_eurm(self, target_pids=None):
        assert (self.mode == 'offline')
        res = self.ev.fast_evaluate_eurm(self.eurm,
                                         target_pids=target_pids,
                                         verbose=self.verbose_ev)
        return res

    def evaluate_eurm(self, target_pids):
        assert (self.mode == 'offline')
        eurm = sps.csr_matrix(self.eurm[target_pids])
        eurm = post.eurm_remove_seed(eurm, self.dr)
        rec_list = post.eurm_to_recommendation_list(eurm)
        res = self.ev.evaluate(rec_list,
                               str(self),
                               verbose=self.verbose_ev,
                               return_result='all')
        return res

    #### UTILITY METHODS ####

    def clear_similarity(self):
        del self.s

    def clear_eurm(self):
        del self.eurm

    def save_similarity(self, name_file, compressed=False):
        sps.save_npz(name_file, self.s, compressed)

    def save_small_eurm(self, name_file, target_pids, compressed=True):
        eurm = sps.csr_matrix(self.eurm[target_pids])
        sps.save_npz(name_file, eurm, compressed)

    #### OVERRIDE METHODS ####

    def __str__(self):
        name = (
            'CF_AL_BM25: alpha=%.3f, beta=%.3f, k=%d, shrink=%d, power=%.3f, threshold=%.5f, binary=%s, bm25=%s'
            % (self.alpha, self.beta, self.k, self.shrink, self.power,
               self.threshold, str(self.binary), str(self.bm25)))
        return name

    #### TUNING METHODS ####

    def tune_alpha_beta(self,
                        range_alpha=np.arange(0, 1.1, 0.1),
                        range_beta=np.arange(0, 1.1, 0.1),
                        k=200,
                        shrink=0,
                        threshold=0,
                        power=1,
                        verbose_tune=True,
                        filename='tuning_bm25_alpha_beta',
                        overwrite=False,
                        save_mean=True,
                        save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for alpha in range_alpha:
            for beta in range_beta:
                self.model(alpha=alpha,
                           beta=beta,
                           k=k,
                           shrink=shrink,
                           power=power,
                           threshold=threshold)
                self.fast_recommend()
                self.clear_similarity()
                mean, df_all_values = self.fast_evaluate_eurm()
                self.clear_eurm()
                s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                    mean[0], mean[1], mean[2])
                if verbose_tune: print(str(self) + '\n' + s_mean)
                if save_mean: tp.print_mean_values(str(self), mean)
                if save_full:
                    tp.print_full_values(description=str(self),
                                         dict_val={
                                             'alpha': alpha,
                                             'beta': beta
                                         },
                                         dataframe=df_all_values)

    #use this tuning method only with beta=0
    def tune_alpha(self,
                   range_alpha=np.arange(0.0, 2, 0.1),
                   beta=0,
                   power=1,
                   k=100,
                   shrink=0,
                   threshold=0,
                   verbose_tune=True,
                   filename='tuning_bm25_k',
                   overwrite=False,
                   save_mean=True,
                   save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for alpha in range_alpha:
            self.model(alpha=alpha,
                       beta=beta,
                       k=k,
                       shrink=shrink,
                       power=power,
                       threshold=threshold)
            self.fast_recommend()
            self.clear_similarity()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            # save values
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full:
                tp.print_full_values(description=str(self),
                                     dict_val={'beta': k},
                                     dataframe=df_all_values)
        tp.make_pdf_full()

    def tune_power(self,
                   range_power=np.arange(0.5, 1.5, 0.1),
                   k=100,
                   shrink=0,
                   threshold=0,
                   verbose_tune=False,
                   alpha=1,
                   beta=1,
                   filename='tuning_bm25_alpha',
                   overwrite=False,
                   save_mean=True,
                   save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        self.model(
            alpha=alpha,
            beta=beta,
            k=k,
            shrink=shrink,
            power=1,
            threshold=threshold)  #exploit this trick to generate fastest model
        save_data = self.s.data
        for power in range_power:
            self.s.data = save_data
            self.s.data = np.power(self.s.data, power)
            self.power = power
            self.fast_recommend()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full:
                tp.print_full_values(description=str(self),
                                     dict_val={'power': power},
                                     dataframe=df_all_values)
        tp.make_pdf_full()

    def tune_beta(self,
                  range_beta=np.arange(0.0, 2, 0.1),
                  alpha=1,
                  power=1,
                  k=100,
                  shrink=0,
                  threshold=0,
                  verbose_tune=True,
                  filename='tuning_bm25_k',
                  overwrite=False,
                  save_mean=True,
                  save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for beta in range_beta:
            self.model(alpha=alpha,
                       beta=beta,
                       k=k,
                       shrink=shrink,
                       power=power,
                       threshold=threshold)
            self.fast_recommend()
            self.clear_similarity()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            # save values
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full:
                tp.print_full_values(description=str(self),
                                     dict_val={'beta': k},
                                     dataframe=df_all_values)
        tp.make_pdf_full()

    def tune_k(self,
               range_k=np.arange(25, 300, 25),
               alpha=1,
               beta=0,
               power=1,
               shrink=0,
               threshold=0,
               verbose_tune=True,
               filename='tuning_bm25_k',
               overwrite=False,
               save_mean=True,
               save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for k in range_k:
            self.model(alpha=alpha,
                       beta=beta,
                       k=k,
                       shrink=shrink,
                       power=power,
                       threshold=threshold)
            self.fast_recommend()
            self.clear_similarity()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full:
                tp.print_full_values(description=str(self),
                                     dict_val={'k': k},
                                     dataframe=df_all_values)
        tp.make_pdf_full()

    def tune_shrink(self,
                    range_shrink=np.arange(25, 300, 25),
                    alpha=1,
                    beta=0,
                    power=1,
                    k=200,
                    threshold=0,
                    verbose_tune=True,
                    filename='tuning_bm25_shrink',
                    overwrite=False,
                    save_mean=True,
                    save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for shrink in range_shrink:
            self.model(alpha=alpha,
                       beta=beta,
                       k=k,
                       shrink=shrink,
                       power=power,
                       threshold=threshold)
            self.fast_recommend()
            self.clear_similarity()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full:
                tp.print_full_values(description=str(self),
                                     dict_val={'shrink': shrink},
                                     dataframe=df_all_values)
        tp.make_pdf_full()
Пример #9
0
                                  URM_validation=None)

    cfw.fit()

    weights = sps.diags(cfw.D_best)

    sps.save_npz("ICM_fw_maurizio", weights)

    ICM_weighted = ICM.dot(weights)

    sps.save_npz("ICM_fw_maurizio", ICM_weighted)

    ######## NOI
    urm = dr.get_urm()
    pid = dr.get_test_pids()

    cbfi = Knn_content_item()
    cbfi.fit(urm, ICM_weighted, pid)

    cbfi.compute_model(top_k=knn,
                       sm_type=COSINE,
                       shrink=0,
                       binary=False,
                       verbose=True)
    cbfi.compute_rating(top_k=topk, verbose=True, small=True)

    sps.save_npz(complete_name + ".npz", cbfi.eurm)
    ev = Evaluator(dr)
    ev.evaluate(recommendation_list=eurm_to_recommendation_list(cbfi.eurm),
                name=complete_name)
    import time
    name = "ensemble-" + mode + "-data-" + time.strftime(
        "%x") + "-" + time.strftime("%X")
    name = name.replace("/", "_")
    sps.save_npz("results/" + name + ".npz", res)

    print("[ Initizalizing Datereader ]")
    dr = Datareader(verbose=False, mode=mode, only_load="False")

    res = eurm_to_recommendation_list(res, datareader=dr)

    if mode == "offline":
        print("[ Initizalizing Evaluator ]")
        ev = Evaluator(dr)
        ev.evaluate(res, name="ens")

    if mode == "online":
        print("[ Initizalizing Submitter ]")
        sb = Submitter(dr)
        sb.submit(recommendation_list=res,
                  name=name,
                  track="main",
                  verify=True,
                  gzipped=False)

#
#
#
# if type == "splitted":
#     mode = "offline"
Пример #11
0
                    result_dict[song] = fs.freq


        for song_predicted in result_dict:

            pred[i,song_predicted] = result_dict[song_predicted]





    eurm = eurm_remove_seed(pred , dr )

    rec_list = eurm_to_recommendation_list(eurm)

    ev.evaluate(rec_list, "cat2_top",verbose=True, do_plot=True, show_plot=True, save=True, )



# seuences: [15565, 6186, 6288, 6292, 6294, 6295, 6298, 6310, 6334, 6336, 6337, 6339, 6340, 6362, 6380, 6387, 7597, 7603, 7604, 7605, 7606, 7607, 6173, 6077, 6040, 6027, 74, 76, 77, 81, 282, 768, 2163, 2506, 2507, 2508, 7609, 3084, 3166, 3183, 3282, 3283, 3697, 4211, 4420, 4443, 4493, 6019, 3162, 73, 8408, 8460, 15544, 15545, 15546, 15547, 15548, 15549, 15550, 15551, 15552, 15553, 15554, 15555, 15556, 15557, 15558, 15559, 15560, 15561, 15562, 15563, 15564, 15543, 15503, 15152, 14809, 8484, 8940, 10480, 10527, 10820, 11192, 11200, 11482, 11500, 11512, 8409, 12605, 12710, 12714, 12716, 12728, 12794, 13689, 13692, 14467, 14797, 14801, 12610, 51]
# seuences: [11500]
#
#
#
# [[11500], [12714]], 62
# [[11500], [70]], 62
# [[11500], [64]], 70
# [[11500], [14809]], 71
# [[11500], [13893]], 72
# [[11500], [69]], 81
# [[11500], [69], [68]], 46
Пример #12
0
import numpy as np
import sys


datareader = Datareader(mode='offline', only_load=True, verbose=False)
evaluator = Evaluator(datareader)

urm = datareader.get_urm()
ucm_album = datareader.get_ucm_albums()

albums_pop = ucm_album.sum(axis=0).A1
mask = np.argsort(albums_pop)[::-1][:100]
ut.inplace_set_cols_zero(ucm_album, mask)

ucm_album = bm25_row(ucm_album)

print('Similarity..')
sim = tversky_similarity(ucm_album, ucm_album.T, shrink=200, alpha=0.1, beta=1, k=800, verbose=1, binary=False)
sim = sim.tocsr()

test_pids = list(datareader.get_test_pids())

eurm = dot_product(sim, urm, k=750)
eurm = eurm.tocsr()
eurm = eurm[test_pids, :]
sparse.save_npz('eurm_albums_depop_100_offline.npz', eurm)

eurm = eurm_remove_seed(eurm, datareader)

evaluator.evaluate(eurm_to_recommendation_list(eurm), name='cbuser_album_depop_100', show_plot=False)
        for token in tokens:
            playlists_with_tokens.extend(
                ucm_csc.indices[ucm_csc.indptr[token]:ucm_csc.indptr[token +
                                                                     1]])

        urm_tmp = urm_csr[playlists_with_tokens]

        track_total_interactions = np.array(urm_tmp.sum(axis=0)).astype(
            np.int32)[0, :]  # like ravel

        top_pop = track_total_interactions.argsort()[-750:][::-1]

        rec_list[i] = top_pop
        i += 1

    np.save("nlp_toketoppop_rec_list_offline", rec_list)

    eurm = rec_list_to_eurm(rec_list=rec_list)
    eurm = eurm_remove_seed(eurm, dr)

    rec_list = eurm_to_recommendation_list(eurm)

    ev.evaluate(
        rec_list,
        "WEILA2_toktoktop_pop",
        verbose=True,
        do_plot=True,
        show_plot=True,
        save=True,
    )
Пример #14
0
    # TopPop Album
    album = artists_dic[track_ind]
    playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album.
                                  indptr[album + 1]]

    top = urm[playlists].sum(axis=0).A1.astype(np.int32)

    track_ind_rec = top.argsort()[-501:][::-1]

    eurm2[row, track_ind_rec] = top[track_ind_rec]

eurm1 = eurm1.tocsr()[pids_all]
eurm2 = eurm2.tocsr()[pids_all]

eurm1 = eurm_remove_seed(eurm1, dr)
eurm2 = eurm_remove_seed(eurm2, dr)

sps.save_npz("test1.npz", eurm1)

rec_list1 = eurm_to_recommendation_list(eurm1)
rec_list2 = eurm_to_recommendation_list(eurm2)
rec_list3 = append_rec_list(rec_list1 + rec_list2)

ev = Evaluator(dr)
ev.evaluate(rec_list1, name="enstest", level='track')
ev.evaluate(rec_list2, name="enstest", level='track')
ev.evaluate(rec_list3, name="enstest", level='track')

# rec.append(list(top_p))
Пример #15
0
# INITIALIZATION
dr = Datareader(mode='offline', verbose=False, only_load=True)
ev = Evaluator(dr)
test_pids = dr.get_test_pids()
urm = dr.get_urm()
topk = 750

nlp_strict = NLPStrict(dr)
ucm_strict = nlp_strict.get_UCM()

# TVERSKY
for a in [0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.7, 2.0]:

    print('---------')
    print('TVERSKY | power =', a)

    sim = tversky_similarity(ucm_strict, ucm_strict.T, k=450, alpha=0.2, beta=0.5,
                             shrink=150, target_items=test_pids)

    sim.data = np.power(sim.data, a)

    # Compute eurm
    eurm = dot_product(sim, urm, k=topk)
    eurm = eurm.tocsr()
    eurm = eurm[test_pids, :]

    rec_list = eurm_to_recommendation_list(eurm, datareader=dr)

    ev.evaluate(rec_list, name='nlp_strict_tversky_power=' + str(a))

Пример #16
0
start = time.time()
# Compute similarity

similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1)
similarity = similarity.tocsr()
print(time.time() - start)

print('Computing eurm...')
start = time.time()
# Compute eurm
eurm = dot_product(similarity, urm, k=500)
eurm = eurm.tocsr()
eurm = eurm[test_playlists, :]
print('eurm', eurm.shape)
print(time.time() - start)

# Evaluating
rec_list = eurm_to_recommendation_list(eurm)
(prec_t, ndcg_t, clicks_t, prec_a, ndcg_a,
 clicks_a) = evaluator.evaluate(rec_list,
                                return_overall_mean=True,
                                name='AAANLP_' + nome,
                                verbose=True,
                                show_plot=False)

# gc.collect()
# del eurm, rec_list, similarity, nlp, test_playlists, start, nome
# gc.collect()

np.save("ret", [clicks_t])
        #Computing similarity/model
        rec.compute_model(top_k=knn,
                          sm_type=sm.COSINE,
                          shrink=200,
                          alpha=0.1,
                          beta=1,
                          binary=True,
                          verbose=True)

        #Computing ratings
        rec.compute_rating(top_k=topk, verbose=True, small=True)

        #evaluation and saving
        sps.save_npz(complete_name + ".npz", rec.eurm)
        ev.evaluate(recommendation_list=eurm_to_recommendation_list(rec.eurm),
                    name=name)

    if mode == "online":
        """Submission"""
        #Data initialization
        dr = Datareader(verbose=True, mode=mode, only_load=False)

        #Recommender algorithm initialization
        rec = Knn_collaborative_item()

        #Submitter initialization
        sb = Submitter(dr)

        #Getting for the recommender algorithm
        urm = dr.get_urm()
        pid = dr.get_test_pids()
    sequences = load_obj(path=ROOT_DIR+'/data/cat1/', name='sequences_cat1_'+str(i))
    popularity = len(sequences)
    preds_line = np.zeros(2262292)

    for seq in fpgrowth(sequences,supp= -popularity/costante_di_popolarita, target='m'):
        for song in seq[0]:
            preds_line[song]+= seq[1]*(len(seq[0])-1)*(len(seq[0])-1)
    vals = fast_argpart(preds_line)

    pred_lil[i,vals] = preds_line[vals]


eurm = sps.csr_matrix(pred_lil)
eurm = eurm_remove_seed(eurm , dr )
rec_list = eurm_to_recommendation_list(eurm)
ev.evaluate(rec_list, "cat2_spm_max",verbose=True, do_plot=True, show_plot=True, save=True )

exit()

# # parallel association rule.


import gc

target = 'm'
costante_di_pop = 15


# In[9]:

Пример #19
0
    knn = 500
    topk = 750

    # LOAD EURM
    eurm = sparse.load_npz(
        ROOT_DIR + '/data/ensemble_per_cat_offline_new_data_32_maggio.npz')
    rec_list = eurm_to_recommendation_list(eurm, datareader=dr)

    # SIMILARITIES
    # ucm_album = dr.get_ucm_albums()
    # sim_album = tversky_similarity(ucm_album.T, ucm_album, shrink=200,
    #                                alpha=0.1, beta=1, k=knn, verbose=1, binary=False)
    # sim_album = sim_album.tocsr()
    sim_album = sparse.load_npz(ROOT_DIR + '/data/sim_album.npz')

    # ucm_artist = dr.get_ucm_artists()
    # sim_artist = tversky_similarity(ucm_artist.T, ucm_artist, shrink=200,
    #                                alpha=0.1, beta=1, k=knn, verbose=1, binary=False)
    # sim_artist = sim_artist.tocsr()
    sim_artist = sparse.load_npz(ROOT_DIR + '/data/sim_artist.npz')

    # TWOBOOST
    rec_list_new = two_boost(rec_list,
                             dr,
                             sim_al=sim_album,
                             sim_ar=sim_artist,
                             prob=[0.85, 0.1, 0.05])

    # EVALUATION
    ev.evaluate(rec_list_new, name='toptwo')
class CB_AR_BM25:
    def __init__(self,
                 icm,
                 urm,
                 binary=False,
                 verbose=True,
                 mode='offline',
                 datareader=None,
                 verbose_evaluation=True):
        assert (mode in ('offline', 'online'))
        if binary: urm.data = np.ones(urm.data.shape[0])
        self.urm = urm
        self.m_ic = pre.bm25_col(icm.copy()).tocsr()
        self.m_ci = pre.bm25_col(icm.T.copy()).tocsr()
        self.binary = binary
        self.verbose = verbose
        self.verbose_ev = verbose_evaluation
        self.dr = datareader
        self.mode = mode
        if mode == 'offline':
            self.ev = Evaluator(self.dr)

    def model(self, alpha=1, k=200, shrink=0, threshold=0, target_items=None):
        #if target_items is None it calculate the whole similarity
        self.alpha = alpha
        self.k = k
        self.shrink, self.threshold = shrink, threshold
        self.s = ss.p3alpha_similarity(self.m_ic,
                                       self.m_ci,
                                       k=k,
                                       shrink=shrink,
                                       alpha=alpha,
                                       threshold=threshold,
                                       verbose=self.verbose,
                                       target_items=target_items)

    def recommend(self, target_pids=None, eurm_k=750):
        #if target_pids is None it calculate the whole eurm
        self.eurm = ss.dot_product(self.urm,
                                   self.s.T,
                                   k=eurm_k,
                                   target_items=target_pids,
                                   verbose=self.verbose)  ##or s.T????
        # TODO: here we can try some postprocessing on eurm if complete (like normalize for column)

    #### METHODS FOR OFFLINE MODE ####
    def fast_recommend(self, target_pids=None, eurm_k=750):
        assert (self.mode == 'offline')
        if target_pids is None: target_pids = self.dr.get_test_pids()
        self.recommend(target_pids=target_pids, eurm_k=eurm_k)

    def fast_evaluate_eurm(self, target_pids=None):
        assert (self.mode == 'offline')
        res = self.ev.fast_evaluate_eurm(self.eurm,
                                         target_pids=target_pids,
                                         verbose=self.verbose_ev)
        return res

    def evaluate_eurm(self, target_pids):
        assert (self.mode == 'offline')
        eurm = sps.csr_matrix(self.eurm[target_pids])
        eurm = post.eurm_remove_seed(eurm, self.dr)
        rec_list = post.eurm_to_recommendation_list(eurm)
        res = self.ev.evaluate(rec_list,
                               str(self),
                               verbose=self.verbose_ev,
                               return_result='all')
        return res

    #### UTILITY METHODS ####

    def clear_similarity(self):
        del self.s

    def clear_eurm(self):
        del self.eurm

    def save_similarity(self, name_file, compressed=False):
        sps.save_npz(name_file, self.s, compressed)

    def save_small_eurm(self, name_file, target_pids, compressed=True):
        eurm = sps.csr_matrix(self.eurm[target_pids])
        sps.save_npz(name_file, eurm, compressed)

    #### OVERRIDE METHODS ####

    def __str__(self):
        name = (
            'CB_AR_BM25: alpha=%.3f, k=%d, shrink=%d, threshold=%.5f, binary=%s'
            % (self.alpha, self.k, self.shrink, self.threshold, str(
                self.binary)))
        return name

    #### TUNING METHODS ####
    def tune_alpha(self,
                   range_alpha=np.arange(0.5, 1.5, 0.1),
                   k=100,
                   shrink=0,
                   threshold=0,
                   verbose_tune=False,
                   filename='tuning_bm25_alpha',
                   overwrite=False,
                   save_mean=True,
                   save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        self.model(
            alpha=1, k=k, shrink=shrink,
            threshold=threshold)  #exploit this trick to generate fastest model
        save_data = self.s.data
        for alpha in range_alpha:
            self.s.data = save_data
            self.s.data = np.power(self.s.data, alpha)
            self.alpha = alpha
            self.fast_recommend()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full:
                tp.print_full_values(description=str(self),
                                     dict_val={'alpha': alpha},
                                     dataframe=df_all_values)
        tp.make_pdf_full()

    def tune_k(self,
               range_k=np.arange(25, 300, 25),
               alpha=1,
               shrink=0,
               threshold=0,
               verbose_tune=False,
               filename='tuning_bm25_k',
               overwrite=False,
               save_mean=True,
               save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite,
                       verbose=verbose_tune)
        for k in range_k:
            self.model(alpha=alpha, k=k, shrink=shrink, threshold=threshold)
            self.fast_recommend()
            self.clear_similarity()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            # save values
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full:
                tp.print_full_values(description=str(self),
                                     dict_val={'k': k},
                                     dataframe=df_all_values)
        tp.make_pdf_full()

    def tune_shrink(self,
                    range_shrink=np.arange(25, 300, 25),
                    k=200,
                    alpha=1,
                    threshold=0,
                    verbose_tune=False,
                    filename='tuning_bm25_shrink',
                    overwrite=False,
                    save_mean=True,
                    save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for shrink in range_shrink:
            self.model(alpha=alpha, k=k, shrink=shrink, threshold=threshold)
            self.fast_recommend()
            self.clear_similarity()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            # save values
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full:
                tp.print_full_values(description=str(self),
                                     dict_val={'shrink': shrink},
                                     dataframe=df_all_values)
        tp.make_pdf_full()
import utils.pre_processing as pre
from boosts.hole_boost import HoleBoost
from utils.datareader import Datareader
from utils.definitions import ROOT_DIR
from utils.evaluator import Evaluator
from utils.post_processing import eurm_to_recommendation_list

# Initialization
dr = Datareader(mode='offline', only_load=True)
ev = Evaluator(dr)

# Load matrices
eurm = sparse.load_npz(ROOT_DIR + '/data/eurm_rp3_offline.npz')
sim = sparse.load_npz(ROOT_DIR + '/data/sim_offline.npz')
print('Loaded')

# Normalization
eurm = pre.norm_l2_row(eurm)
sim = pre.norm_l2_row(sim)

# HoleBoost

h = HoleBoost(sim, eurm, dr)
eurm_b = h.boost_eurm(categories=[2, 3, 4, 5, 6, 7, 8, 9, 10], k=200, gamma=10)

#sparse.save_npz(ROOT_DIR + '/data/eurm_boosted_online.npz', eurm_b)
rec_list = eurm_to_recommendation_list(eurm_b)

# Evaluation
ev.evaluate(rec_list, name='rp3_l2_all_200_10', save=True, show_plot=False)
    n_factors = 100
    top_k = 750
    mode = 'online'

    if mode == 'offline':
        # Initialization
        dr = Datareader(mode='offline', only_load=True, verbose=False)
        ev = Evaluator(dr)

        # Prediction
        eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True)

        # Evaluation
        print('N_FACTORS =', n_factors)
        ev.evaluate(eurm_to_recommendation_list(eurm, datareader=dr), name='svd_' + str(n_factors))

    elif mode == 'online':
        # Initialization
        dr = Datareader(mode='online', only_load=True, verbose=False)
        sb = Submitter(dr)

        # Prediction
        eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True)

        # Submission
        sb.submit(eurm_to_recommendation_list_submission(eurm, datareader=dr), name='svd_' + str(n_factors))

    else:
        print('Wrong mode!')
Пример #23
0
class CF_IB_BM25:
    def __init__(self,
                 urm,
                 pop=None,
                 binary=False,
                 verbose=True,
                 mode='offline',
                 datareader=None,
                 verbose_evaluation=True):
        assert (mode in ('offline', 'online'))
        if binary: urm.data = np.ones(urm.data.shape[0])
        if pop is None: self.pop = urm.sum(axis=0).A1
        else: self.pop = pop
        self.urm = urm
        self.m_ui = pre.bm25_row(urm.copy()).tocsr()
        self.m_iu = pre.bm25_row(urm.T.copy()).tocsr()
        self.binary = binary
        self.verbose = verbose
        self.verbose_ev = verbose_evaluation
        self.dr = datareader
        self.mode = mode
        if mode == 'offline':
            self.ev = Evaluator(self.dr)

    def model(self,
              alpha=1,
              beta=0,
              k=200,
              shrink=0,
              threshold=0,
              rp3_mode=0,
              target_items=None):
        #if target_items is None it calculate the whole similarity
        self.alpha, self.beta = alpha, beta
        self.k = k
        self.shrink, self.threshold = shrink, threshold
        self.rp3_mode = rp3_mode
        self.s = p3r3.p3alpha_rp3beta_similarity(self.m_iu,
                                                 self.m_ui,
                                                 self.pop,
                                                 k=k,
                                                 shrink=shrink,
                                                 alpha=alpha,
                                                 beta=beta,
                                                 threshold=threshold,
                                                 verbose=self.verbose,
                                                 mode=rp3_mode,
                                                 target_items=target_items)

    def recommend(self, target_pids=None, eurm_k=750):
        #if target_pids is None it calculate the whole eurm
        self.eurm = ss.dot_product(self.urm,
                                   self.s,
                                   k=eurm_k,
                                   target_items=target_pids,
                                   verbose=self.verbose)
        # TODO: here we can try some postprocessing on eurm if complete (like normalize for column)

    #### METHODS FOR OFFLINE MODE ####
    def fast_recommend(self, target_pids=None, eurm_k=750):
        assert (self.mode == 'offline')
        if target_pids is None: target_pids = self.dr.get_test_pids()
        self.recommend(target_pids=target_pids, eurm_k=eurm_k)

    def fast_evaluate_eurm(self, target_pids=None):
        assert (self.mode == 'offline')
        res = self.ev.fast_evaluate_eurm(self.eurm,
                                         target_pids=target_pids,
                                         verbose=self.verbose_ev)
        return res

    def evaluate_eurm(self, target_pids):
        assert (self.mode == 'offline')
        eurm = sps.csr_matrix(self.eurm[target_pids])
        eurm = post.eurm_remove_seed(eurm, self.dr)
        rec_list = post.eurm_to_recommendation_list(eurm)
        res = self.ev.evaluate(rec_list,
                               str(self),
                               verbose=self.verbose_ev,
                               return_result='all')
        return res

    #### UTILITY METHODS ####

    def clear_similarity(self):
        del self.s

    def clear_eurm(self):
        del self.eurm

    def save_similarity(self, name_file, compressed=False):
        sps.save_npz(name_file, self.s, compressed)

    def save_small_eurm(self, name_file, target_pids, compressed=True):
        eurm = sps.csr_matrix(self.eurm[target_pids])
        sps.save_npz(name_file, eurm, compressed)

    #### OVERRIDE METHODS ####

    def __str__(self):
        name = (
            'CF_IB_BM25: alpha=%.3f, beta=%.3f, k=%d, shrink=%d, threshold=%.5f, binary=%s, rp3mode=%d'
            % (self.alpha, self.beta, self.k, self.shrink, self.threshold,
               str(self.binary), self.rp3_mode))
        return name

    #### TUNING METHODS ####

    def tune_alpha_beta(self,
                        range_alpha=np.arange(0, 1.1, 0.1),
                        range_beta=np.arange(0, 1.1, 0.1),
                        k=200,
                        shrink=0,
                        threshold=0,
                        verbose_tune=True,
                        filename='tuning_bm25_alpha_beta',
                        overwrite=False,
                        save_mean=True,
                        save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for alpha in range_alpha:
            for beta in range_beta:
                self.model(alpha=alpha,
                           beta=beta,
                           k=k,
                           shrink=shrink,
                           threshold=threshold)
                self.fast_recommend()
                self.clear_similarity()
                mean, df_all_values = self.fast_evaluate_eurm()
                self.clear_eurm()
                s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                    mean[0], mean[1], mean[2])
                if verbose_tune: print(str(self) + '\n' + s_mean)
                # save values
                if save_mean: tp.print_mean_values(str(self), mean)
                if save_full: tp.print_full_values(str(self), df_all_values)

    def tune_k(self,
               range_k=np.arange(25, 300, 25),
               alpha=1,
               beta=0,
               shrink=0,
               threshold=0,
               verbose_tune=True,
               filename='tuning_bm25_k',
               overwrite=False,
               save_mean=True,
               save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for k in range_k:
            self.model(alpha=alpha,
                       beta=beta,
                       k=k,
                       shrink=shrink,
                       threshold=threshold)
            self.fast_recommend()
            self.clear_similarity()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            # save values
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full: tp.print_full_values(str(self), df_all_values)

    def tune_shrink(self,
                    range_shrink=np.arange(25, 300, 25),
                    alpha=1,
                    beta=0,
                    k=200,
                    threshold=0,
                    verbose_tune=True,
                    filename='tuning_bm25_shrink',
                    overwrite=False,
                    save_mean=True,
                    save_full=True):
        tp = TunePrint(filename=filename,
                       full=save_full,
                       mean=save_mean,
                       overwrite=overwrite)
        for shrink in range_shrink:
            self.model(alpha=alpha,
                       beta=beta,
                       k=k,
                       shrink=shrink,
                       threshold=threshold)
            self.fast_recommend()
            self.clear_similarity()
            mean, df_all_values = self.fast_evaluate_eurm()
            self.clear_eurm()
            s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % (
                mean[0], mean[1], mean[2])
            if verbose_tune: print(str(self) + '\n' + s_mean)
            # save values
            if save_mean: tp.print_mean_values(str(self), mean)
            if save_full: tp.print_full_values(str(self), df_all_values)
        print(arg)
        best = list(arg[1:].astype(np.float))
        w.append(best)

    for i in tqdm(range(1,11)):
        if mode == "offline":

            CBF_ALBUM = sps.load_npz(mode+"/offline-cbf_item_album-cat"+str(i)+".npz")
            CBF_ARTISTA = sps.load_npz(mode+"/offline-cbf_item_artist-cat"+str(i)+".npz")
            NLP = norm_max_row(sps.load_npz(mode + "/nlp_eurm_offline_bm25-cat" + str(1) + ".npz"))
            RP3BETA = sps.load_npz(mode+"/offline-rp3beta-cat"+str(i)+".npz")
            CF_USER = sps.load_npz(mode + "/cfu_eurm-cat"+str(i)+".npz")
            SLIM = sps.load_npz(mode +"/slim_bpr_completo_test1-cat"+str(i)+".npz")
            CBF_USER_ARTIST = sps.load_npz(mode +"/eurm_cbfu_artists_offline-cat"+str(i)+".npz")


        matrix = [CBF_ALBUM, CBF_ARTISTA, NLP, RP3BETA, CF_USER, SLIM, CBF_USER_ARTIST]

        we = w[i-1]

        res.append(ensembler(matrix, we, normalization_type="lele"))

    ret = sps.vstack(res).tocsr()
    if mode == "offline":
        ev.evaluate(eurm_to_recommendation_list(ret), "best_test", verbose=True)

#    sps.save_npz("ensemble_per_cat_"+mode+"_new_data_28_maggio.npz", ret)
    if mode == "online":
        sb = Submitter(dr)
        sb.submit(recommendation_list=eurm_to_recommendation_list_submission(ret), name="best_test", track="main", verify=True, gzipped=False)
                                 verbose=1,
                                 binary=False)
        sim = sim.tocsr()

        # Prediction
        eurm = dot_product(sim, urm, k=topk)
        eurm = eurm.tocsr()
        eurm = eurm[test_pids, :]

        # Save eurm
        if save_eurm:
            sps.save_npz('eurm_' + name + '_' + mode + '.npz', eurm)

        # Evaluation
        ev.evaluate(recommendation_list=eurm_to_recommendation_list(
            eurm, datareader=dr),
                    name=complete_name)

    elif mode == "online":
        # Initialization
        dr = Datareader(verbose=False, mode=mode, only_load=True)
        test_pids = list(dr.get_test_pids())
        sb = Submitter(dr)
        urm = dr.get_urm()

        # UCM
        ucm_artists = dr.get_ucm_albums()
        ucm_artists = bm25_row(ucm_artists)

        # Do not train on challenge set
        ucm_artists_T = ucm_artists.copy()
Пример #26
0
# UCMs
ucm_album = dr.get_ucm_albums(remove_duplicates=True)
#ucm = dr.get_ucm_followers(n_clusters)

#ucm = sparse.hstack((ucm_album, ucm_followers))
#ucm = bm25_row(ucm)

# Similarity
print('Similarity..')
sim = tversky_similarity(ucm_album,
                         ucm_album.T,
                         shrink=200,
                         target_items=test_pids,
                         alpha=0.1,
                         beta=1,
                         k=knn,
                         verbose=1,
                         binary=False)
sim = sim.tocsr()

# Prediction
eurm = dot_product(sim, urm, k=topk)
eurm = eurm.tocsr()
eurm = eurm[test_pids, :]

# Evaluation
ev.evaluate(recommendation_list=eurm_to_recommendation_list(eurm,
                                                            datareader=dr),
            name='ucm_album_followers')
Пример #27
0
        #Computing similarity/model
        rec.compute_model(top_k=knn,
                          sm_type=tversky_similarity,
                          shrink=200,
                          alpha=0.1,
                          beta=1,
                          binary=True,
                          verbose=True)

        #Computing ratings
        rec.compute_rating(top_k=topk, verbose=True, small=True)

        #evaluation and saving
        sps.save_npz(complete_name + ".npz", rec.eurm)
        ev = Evaluator(dr)
        ev.evaluate(eurm_to_recommendation_list(rec.eurm), name=complete_name)

    elif mode == "online":
        """Submission"""
        #Data initialization
        dr = Datareader(verbose=True, mode=mode, only_load=False)

        #Recommender algorithm initialization
        rec = Knn_collabrative_user()

        #Getting for the recommender algorithm
        urm = dr.get_urm()
        pid = dr.get_test_pids()

        #Fitting data
        rec.fit(urm, pid)
# Compute similarity
ucm= bm25_row(ucm)

similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1)
similarity = similarity.tocsr()
print(time.time() - start)


print('Computing eurm...')
start = time.time()
# Compute eurm
eurm = dot_product(similarity, urm, k=500)
eurm = eurm.tocsr()
eurm = eurm[test_playlists, :]
print('eurm', eurm.shape)
print(time.time() - start)


# Evaluating
rec_list = eurm_to_recommendation_list(eurm)

sps.save_npz("nlp_eurm_online_bm25.npz", eurm, compressed=False)
np.save("nlp_rec_list_online_bm25",rec_list)

evaluator.evaluate(rec_list, name='AAANLP_bm25_'+nome, verbose=True, show_plot=False)





import sys
from scipy import sparse
import numpy as np
import utils.pre_processing as pre
from utils.definitions import *
from utils.datareader import Datareader
from utils.evaluator import Evaluator
from utils.pre_processing import *
from utils.post_processing import *

dr = Datareader(mode='offline', only_load=True, verbose=False)
ev = Evaluator(dr)
urm = dr.get_urm(binary=True)
urm_csc = urm.tocsc(copy=True)

sim_nlp = sparse.load_npz(ROOT_DIR + '/data/sim_nlp_lele.npz')

for k in [1, 2, 3, 4, 5]:
    eurm_top = dr.get_eurm_top_pop_filter_cat_1(sim_nlp, k, topk=500)
    eurm_top = norm_l1_row(eurm_top)

    eurm_nlp = sparse.load_npz(ROOT_DIR + '/data/nlp_fusion_tuned_offline.npz')
    eurm_nlp = norm_l1_row(eurm_nlp)

    for a in [0.05, 0.10, 0.15, 0.20]:
        eurm = eurm_nlp * (1.0 - a) + eurm_top * a
        rec_list = eurm_to_recommendation_list(eurm, datareader=dr)
        ev.evaluate(rec_list, name='pop_first_k=' + str(k) + '_a=' + str(a))
# rp3b = sps.load_npz(ROOT_DIR + "/data/sub/EURM-rp3beta-online.npz")
# knn_c_i_al = sps.load_npz(ROOT_DIR + "/data/sub/KNN CONTENT ITEM-album-top_k=850-sm_type=cosine-shrink=100.npz")
# knn_c_i_ar = sps.load_npz(ROOT_DIR + "/data/sub/KNN CONTENT ITEM-artist-top_k=850-sm_type=cosine-shrink=100.npz")
nlp = sps.load_npz(ROOT_DIR + "/data/eurm_nlp_offline.npz")
# cf_u = sps.load_npz(ROOT_DIR + "/data/sub/eurm_cfu_online.npz")

eurm_ens = sps.load_npz(ROOT_DIR + "/data/ENSEMBLED.npz")

#matrix = [rp3b, knn_c_i_ar, knn_c_i_al, nlp, cf_u]

#eurm_ens = ensembler(matrix, [0.720, 0.113, 0.177, 0.194, 1.0], normalization_type="max")

# HOLEBOOST
hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row)
eurm_ens = hb.boost_eurm(categories=[8, 10], k=300, gamma=5)

# NINEBOOST
nb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row)
eurm_ens = nb.boost_eurm(last_tracks=10, k=100, gamma=0.01)

rec_list = eurm_to_recommendation_list(eurm_ens)
rec_list_nlp = eurm_to_recommendation_list(nlp)

indices = dr.get_test_pids_indices(cat=1)
for i in indices:
    rec_list[i] = rec_list_nlp[i]

# EVALUATION
ev.evaluate(rec_list, name='ens_with_cfu_nineboosted', show_plot=False)