def online():
    # INIT
    dr = Datareader(mode='online', only_load=True, verbose=False)
    sb = Submitter(dr)

    # LOAD AND COMBINE
    eurm_lele = sparse.load_npz(
        ROOT_DIR +
        '/data/jess/ensembled_CLUSTERARTISTS_CREATIVA_cat3-4-5-8-10_online.npz'
    )
    eurm_std = sparse.load_npz(ROOT_DIR +
                               '/data/jess/ensembled_creativeFIRE_online.npz')

    eurm_ens = combine_two_eurms(eurm_lele,
                                 eurm_std,
                                 cat_first=[3, 4, 5, 8, 10])

    # LOAD MATRICES
    # eurm_ens = sparse.load_npz(ROOT_DIR + '/data/ensembled_creativeFIRE_online.npz')
    sim = sparse.load_npz(ROOT_DIR + '/data/sim_online.npz')

    # HOLEBOOST
    hb = HoleBoost(similarity=sim,
                   eurm=eurm_ens,
                   datareader=dr,
                   norm=norm_l1_row)
    eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1)
    hb = HoleBoost(similarity=sim,
                   eurm=eurm_ens,
                   datareader=dr,
                   norm=norm_l1_row)
    eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1)

    # TAILBOOST
    tb = TailBoost(similarity=sim,
                   eurm=eurm_ens,
                   datareader=dr,
                   norm=norm_l2_row)
    eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5],
                             last_tracks=[10, 3, 3, 3],
                             k=[100, 80, 100, 100],
                             gamma=[0.01, 0.01, 0.01, 0.01])

    # ALBUMBOOST
    ab = AlbumBoost(dr, eurm_ens)
    eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9],
                             gamma=2,
                             top_k=[3, 3, 10, 40])

    # MATCHBOOST
    # mb = MatchBoost(datareader=dr, eurm=eurm_ens, top_k_alb=5000, top_k_art=10000)
    # eurm_ens, pids = mb.boost_eurm(categories='all', k_art=20, k_alb=20, gamma_art=1.0, gamma_alb=1.0)

    # SUBMISSION
    rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr)
    sb.submit(rec_list, name='ens_30_june_jess+lele_boosts', track='creative')
示例#2
0
文件: run.py 项目: qlan3/Explorer
def main(argv):

    sbatch_cfg = {
        # Account name
        'account': 'rrg-whitem',
        # Job name
        'job-name': 'catcher',
        # Job time
        'time': '0-10:00:00',
        # GPU/CPU type
        '--cpus-per-task': 1,
        # Memory
        'mem-per-cpu': '2000M',
        # Email address
        'mail-user': '******'
    }

    # sbatch configs backup for different games
    # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'catcher', '0-10:00:00', '2000M'
    # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'copter', '0-05:00:00', '2000M'
    # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'lunar', '0-07:00:00', '2000M'
    # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'minatar', '1-08:00:00', '4000M'

    general_cfg = {
        # User name
        'user': '******',
        # Sbatch script path
        'script-path': './sbatch.sh',
        # Check time interval in minutes
        'check-time-interval': 5,
        # Clusters info: {name: capacity}
        'clusters': {
            'Cedar': 3000
        },
        # Job indexes list
        'job-list': list(range(1, 30 + 1))
    }

    make_dir(f"output/{sbatch_cfg['job-name']}")
    submitter = Submitter(general_cfg, sbatch_cfg)
    submitter.submit()
def submission(boost, eurm_ens, sim, name):
    """
    Function to create a submission from a eurm with or without boosts.
    :param boost: apply boosts
    :param eurm_ens: eurm from ensemble (10k x 2.2M)
    :param sim: similarity matrix (tracks x tracks)
    :param name: name of the submission
    """

    # INIT
    dr = Datareader(mode='online', only_load=True, verbose=False)
    sb = Submitter(dr)

    if boost:
        # HOLEBOOST
        hb = HoleBoost(similarity=sim,
                       eurm=eurm_ens,
                       datareader=dr,
                       norm=norm_l1_row)
        eurm_ens = hb.boost_eurm(categories=[8, 10], k=300, gamma=5)

        # TAILBOOST
        tb = TailBoost(similarity=sim,
                       eurm=eurm_ens,
                       datareader=dr,
                       norm=norm_l2_row)
        eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5],
                                 last_tracks=[10, 3, 3, 3],
                                 k=[100, 80, 100, 100],
                                 gamma=[0.01, 0.01, 0.01, 0.01])

        # ALBUMBOOST
        ab = AlbumBoost(dr, eurm_ens)
        eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9],
                                 gamma=2,
                                 top_k=[3, 3, 10, 40])

    # SUBMISSION
    rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr)
    sb.submit(rec_list, name=name)
示例#4
0
        urm = dr.get_urm()
        pid = dr.get_test_pids()

        #Fitting data
        rec.fit(urm, pid)

        #Computing similarity/model
        rec.compute_model(top_k=knn,
                          sm_type=sm.TVERSKY,
                          shrink=200,
                          alpha=0.1,
                          beta=1,
                          binary=True,
                          verbose=True)

        #Computing ratings
        rec.compute_rating(top_k=topk, verbose=True, small=True)

        #submission and saving
        sps.save_npz(complete_name + ".npz", rec.eurm)
        sb = Submitter(dr)
        sb.submit(recommendation_list=eurm_to_recommendation_list_submission(
            rec.eurm),
                  name=complete_name,
                  track="main",
                  verify=True,
                  gzipped=True)

    else:
        print("invalid mode.")
    print("[ Initizalizing Datereader ]")
    dr = Datareader(verbose=False, mode=mode, only_load="False")

    res = eurm_to_recommendation_list(res, datareader=dr)

    if mode == "offline":
        print("[ Initizalizing Evaluator ]")
        ev = Evaluator(dr)
        ev.evaluate(res, name="ens")

    if mode == "online":
        print("[ Initizalizing Submitter ]")
        sb = Submitter(dr)
        sb.submit(recommendation_list=res,
                  name=name,
                  track="main",
                  verify=True,
                  gzipped=False)

#
#
#
# if type == "splitted":
#     mode = "offline"
#
#     print("[ Loading weights ]")
#     w_rprec = []
#     tmp = 0
#     for i in range(1, 11):
#         arg = np.load("rprec/cat" + str(i) + ".npy")
#         tmp += -float(arg[-1])
        print(arg)
        best = list(arg[1:].astype(np.float))
        w.append(best)

    for i in tqdm(range(1,11)):
        if mode == "offline":

            CBF_ALBUM = sps.load_npz(mode+"/offline-cbf_item_album-cat"+str(i)+".npz")
            CBF_ARTISTA = sps.load_npz(mode+"/offline-cbf_item_artist-cat"+str(i)+".npz")
            NLP = norm_max_row(sps.load_npz(mode + "/nlp_eurm_offline_bm25-cat" + str(1) + ".npz"))
            RP3BETA = sps.load_npz(mode+"/offline-rp3beta-cat"+str(i)+".npz")
            CF_USER = sps.load_npz(mode + "/cfu_eurm-cat"+str(i)+".npz")
            SLIM = sps.load_npz(mode +"/slim_bpr_completo_test1-cat"+str(i)+".npz")
            CBF_USER_ARTIST = sps.load_npz(mode +"/eurm_cbfu_artists_offline-cat"+str(i)+".npz")


        matrix = [CBF_ALBUM, CBF_ARTISTA, NLP, RP3BETA, CF_USER, SLIM, CBF_USER_ARTIST]

        we = w[i-1]

        res.append(ensembler(matrix, we, normalization_type="lele"))

    ret = sps.vstack(res).tocsr()
    if mode == "offline":
        ev.evaluate(eurm_to_recommendation_list(ret), "best_test", verbose=True)

#    sps.save_npz("ensemble_per_cat_"+mode+"_new_data_28_maggio.npz", ret)
    if mode == "online":
        sb = Submitter(dr)
        sb.submit(recommendation_list=eurm_to_recommendation_list_submission(ret), name="best_test", track="main", verify=True, gzipped=False)
        # Similarity
        print('Similarity..')
        sim = tversky_similarity(ucm_artists,
                                 ucm_artists_T,
                                 shrink=200,
                                 target_items=test_pids,
                                 alpha=0.1,
                                 beta=1,
                                 k=knn,
                                 verbose=1,
                                 binary=False)
        sim = sim.tocsr()

        # Prediction
        eurm = dot_product(sim, urm, k=topk)
        eurm = eurm.tocsr()
        eurm = eurm[test_pids, :]

        # Save eurm
        if save_eurm:
            sps.save_npz('eurm_' + name + '_' + mode + '.npz', eurm)

        # Submission
        sb.submit(recommendation_list=eurm_to_recommendation_list(
            eurm, datareader=dr),
                  name=complete_name)

    else:
        print("Invalid mode!")
示例#8
0
####### POSTPROCESSING #################################################################

# COMBINE
eurm_ens = combine_two_eurms(clustered_approach_online,
                             ensembled,
                             cat_first=[3, 4, 5, 8, 10])
sim = generate_similarity('online')

# HOLEBOOST
hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row)
eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1)
hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row)
eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1)

# TAILBOOST
tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row)
eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5],
                         last_tracks=[10, 3, 3, 3],
                         k=[100, 80, 100, 100],
                         gamma=[0.01, 0.01, 0.01, 0.01])

# ALBUMBOOST
ab = AlbumBoost(dr, eurm_ens)
eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9],
                         gamma=2,
                         top_k=[3, 3, 10, 40])

# SUBMISSION
rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr)
sb.submit(rec_list, name='creative_track', track='creative')
####### POSTPROCESSING #################################################################

# COMBINE
eurm_ens = combine_two_eurms(clustered_approach_online,
                             ensembled,
                             cat_first=[4, 5, 6, 8, 10])
sim = generate_similarity('online')

# HOLEBOOST
hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row)
eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1)
hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row)
eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1)

# TAILBOOST
tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row)
eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5],
                         last_tracks=[10, 3, 3, 3],
                         k=[100, 80, 100, 100],
                         gamma=[0.01, 0.01, 0.01, 0.01])

# ALBUMBOOST
ab = AlbumBoost(dr, eurm_ens)
eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9],
                         gamma=2,
                         top_k=[3, 3, 10, 40])

# SUBMISSION
rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr)
sb.submit(rec_list, name='main_track', track='main')
    mode = 'online'

    if mode == 'offline':
        # Initialization
        dr = Datareader(mode='offline', only_load=True, verbose=False)
        ev = Evaluator(dr)

        # Prediction
        eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True)

        # Evaluation
        print('N_FACTORS =', n_factors)
        ev.evaluate(eurm_to_recommendation_list(eurm, datareader=dr), name='svd_' + str(n_factors))

    elif mode == 'online':
        # Initialization
        dr = Datareader(mode='online', only_load=True, verbose=False)
        sb = Submitter(dr)

        # Prediction
        eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True)

        # Submission
        sb.submit(eurm_to_recommendation_list_submission(eurm, datareader=dr), name='svd_' + str(n_factors))

    else:
        print('Wrong mode!')



from personal.Tommaso.Recommenders.top_pop_rec import TopPopRecommender
from utils.datareader import Datareader
from utils.submitter import Submitter
"""
This script shows how to perform correctly a submission.
Basically you have to initialize a Submitter object with csv files 
and then call the method submit which takes in input a numpy array
of recommendations of shape (10.000, 500).
"""

# SUBMITTER
dr = Datareader(mode='online', only_load=True)
sb = Submitter(dr)

# TOP POP
t = TopPopRecommender()
t.fit(dr.get_df_train_interactions(), dr.get_df_test_interactions())
rec_list = t.make_recommendation(dr.get_df_test_playlists()['pid'].as_matrix())

# SUBMISSION
# rec_list is an ordered list of recommendations
# This submission will be rejected due to duplicates occurrences.
sb.submit(recommendation_list=rec_list,
          name='top_pop',
          track='main',
          verify=True,
          gzipped=False)
        # Do not train on challenge set
        ucm_T = ucm.copy()
        inplace_set_rows_zero(ucm_T, test_pids).astype(np.float64)
        ucm_T = ucm_T.T

        # Compute similarity (playlists x playlists)
        sim = tversky_similarity(ucm,
                                 ucm_T,
                                 shrink=200,
                                 alpha=0.9,
                                 beta=1,
                                 k=knn)
        sim = sim.tocsr()

        # Recommendation
        eurm = dot_product(sim, urm, k=topk)
        eurm = eurm.tocsr()
        eurm = eurm[test_pids, :]

        rec_list = eurm_to_recommendation_list(eurm, datareader=dr)

        if save_eurm:
            sps.save_npz(mode + "_" + name + ".npz", eurm, compressed=False)

        # Submission
        sb.submit(rec_list, name=name)

    else:
        print('Wrong mode!')
示例#13
0
        urm = urm[pids]

        ucm = bm25_row(ucm)

        similarity = tversky_similarity(ucm,
                                        binary=False,
                                        shrink=1,
                                        alpha=0.1,
                                        beta=1)
        similarity = similarity.tocsr()

        print(similarity.shape, urm.shape)
        eurm = dot_product(similarity, urm, k=topk)
        eurm = eurm.tocsr()
        eurm = eurm[-10000:]

        eurm = eurm_remove_seed(eurm, dr)
        rec_list = eurm_to_recommendation_list(eurm)

        sps.save_npz(mode + "_" + name + "_knn" + str(knn) + "_bm25.npz",
                     eurm,
                     compressed=False)
        np.save(mode + "_" + name + "_knn" + str(knn) + "_bm25", rec_list)

        sb = Submitter(dr)
        sb.submit(rec_list,
                  name=name,
                  track="main",
                  verify=True,
                  gzipped=False)
示例#14
0
        eurm = dot_product(user_profile_batch, slim.W_sparse, k=500).tocsr()
        recommendation_list = eurm_to_recommendation_list(eurm)

        # calculating eurm, evaluation, save
        user_profile_batch = slim.URM_train[pids_converted]
        eurm = dot_product(user_profile_batch, slim.W_sparse, k=500).tocsr()
        recommendation_list = eurm_to_recommendation_list(eurm)

        sps.save_npz(ROOT_DIR + "/results/" + complete_name + ".npz",
                     eurm,
                     compressed=False)

        sb = Submitter(dr)
        sb.submit(
            recommendation_list=eurm_to_recommendation_list_submission(eurm),
            name=name,
            track="main",
            verify=True,
            gzipped=False)

    else:
        print("invalid mode.")

    # ev.evaluate(recommendation_list=recommendation_list,
    #              name="slim ")

    # except Exception as e:
    #     bot.error("Exception "+str(e))
    #
    # bot.end()

        norm = best_params_dict['norm']
        del best_params_dict['norm']
        # cutting and  dot the value from ensemble
        eurms_full = [ value_from_bayesian * norms[norm](matrices_loaded[name][start_index:end_index])
                        for name, value_from_bayesian in best_params_dict.items()]
        # and summing up
        eurms_cutted[cat-1] = sum( [ matrix for matrix in eurms_full] )

        # adding to reclist
        rec_list[start_index:end_index] = eurm_to_recommendation_list(eurm=eurms_cutted[cat-1],
                                                                      cat=cat,
                                                                      verbose=False)[start_index:end_index]

    eurm = eurms_cutted[0]
    for i in range(1,10):
        eurm = sps.vstack([eurm, eurms_cutted[i]])


    sps.save_npz(file='../'+configuration_name+'/ensembled_'+configuration_name+'_'+mode, matrix=eurm)

    if mode=='offline':
        ev = Evaluator(dr)
        ev.evaluate(recommendation_list=rec_list, name=configuration_name)
    else:
        sb = Submitter(dr)
        sb.submit(recommendation_list=rec_list, name=configuration_name)