def online(): # INIT dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) # LOAD AND COMBINE eurm_lele = sparse.load_npz( ROOT_DIR + '/data/jess/ensembled_CLUSTERARTISTS_CREATIVA_cat3-4-5-8-10_online.npz' ) eurm_std = sparse.load_npz(ROOT_DIR + '/data/jess/ensembled_creativeFIRE_online.npz') eurm_ens = combine_two_eurms(eurm_lele, eurm_std, cat_first=[3, 4, 5, 8, 10]) # LOAD MATRICES # eurm_ens = sparse.load_npz(ROOT_DIR + '/data/ensembled_creativeFIRE_online.npz') sim = sparse.load_npz(ROOT_DIR + '/data/sim_online.npz') # HOLEBOOST hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1) hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1) # TAILBOOST tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row) eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5], last_tracks=[10, 3, 3, 3], k=[100, 80, 100, 100], gamma=[0.01, 0.01, 0.01, 0.01]) # ALBUMBOOST ab = AlbumBoost(dr, eurm_ens) eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9], gamma=2, top_k=[3, 3, 10, 40]) # MATCHBOOST # mb = MatchBoost(datareader=dr, eurm=eurm_ens, top_k_alb=5000, top_k_art=10000) # eurm_ens, pids = mb.boost_eurm(categories='all', k_art=20, k_alb=20, gamma_art=1.0, gamma_alb=1.0) # SUBMISSION rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr) sb.submit(rec_list, name='ens_30_june_jess+lele_boosts', track='creative')
def main(argv): sbatch_cfg = { # Account name 'account': 'rrg-whitem', # Job name 'job-name': 'catcher', # Job time 'time': '0-10:00:00', # GPU/CPU type '--cpus-per-task': 1, # Memory 'mem-per-cpu': '2000M', # Email address 'mail-user': '******' } # sbatch configs backup for different games # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'catcher', '0-10:00:00', '2000M' # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'copter', '0-05:00:00', '2000M' # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'lunar', '0-07:00:00', '2000M' # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'minatar', '1-08:00:00', '4000M' general_cfg = { # User name 'user': '******', # Sbatch script path 'script-path': './sbatch.sh', # Check time interval in minutes 'check-time-interval': 5, # Clusters info: {name: capacity} 'clusters': { 'Cedar': 3000 }, # Job indexes list 'job-list': list(range(1, 30 + 1)) } make_dir(f"output/{sbatch_cfg['job-name']}") submitter = Submitter(general_cfg, sbatch_cfg) submitter.submit()
def submission(boost, eurm_ens, sim, name): """ Function to create a submission from a eurm with or without boosts. :param boost: apply boosts :param eurm_ens: eurm from ensemble (10k x 2.2M) :param sim: similarity matrix (tracks x tracks) :param name: name of the submission """ # INIT dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) if boost: # HOLEBOOST hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[8, 10], k=300, gamma=5) # TAILBOOST tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row) eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5], last_tracks=[10, 3, 3, 3], k=[100, 80, 100, 100], gamma=[0.01, 0.01, 0.01, 0.01]) # ALBUMBOOST ab = AlbumBoost(dr, eurm_ens) eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9], gamma=2, top_k=[3, 3, 10, 40]) # SUBMISSION rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr) sb.submit(rec_list, name=name)
urm = dr.get_urm() pid = dr.get_test_pids() #Fitting data rec.fit(urm, pid) #Computing similarity/model rec.compute_model(top_k=knn, sm_type=sm.TVERSKY, shrink=200, alpha=0.1, beta=1, binary=True, verbose=True) #Computing ratings rec.compute_rating(top_k=topk, verbose=True, small=True) #submission and saving sps.save_npz(complete_name + ".npz", rec.eurm) sb = Submitter(dr) sb.submit(recommendation_list=eurm_to_recommendation_list_submission( rec.eurm), name=complete_name, track="main", verify=True, gzipped=True) else: print("invalid mode.")
print("[ Initizalizing Datereader ]") dr = Datareader(verbose=False, mode=mode, only_load="False") res = eurm_to_recommendation_list(res, datareader=dr) if mode == "offline": print("[ Initizalizing Evaluator ]") ev = Evaluator(dr) ev.evaluate(res, name="ens") if mode == "online": print("[ Initizalizing Submitter ]") sb = Submitter(dr) sb.submit(recommendation_list=res, name=name, track="main", verify=True, gzipped=False) # # # # if type == "splitted": # mode = "offline" # # print("[ Loading weights ]") # w_rprec = [] # tmp = 0 # for i in range(1, 11): # arg = np.load("rprec/cat" + str(i) + ".npy") # tmp += -float(arg[-1])
print(arg) best = list(arg[1:].astype(np.float)) w.append(best) for i in tqdm(range(1,11)): if mode == "offline": CBF_ALBUM = sps.load_npz(mode+"/offline-cbf_item_album-cat"+str(i)+".npz") CBF_ARTISTA = sps.load_npz(mode+"/offline-cbf_item_artist-cat"+str(i)+".npz") NLP = norm_max_row(sps.load_npz(mode + "/nlp_eurm_offline_bm25-cat" + str(1) + ".npz")) RP3BETA = sps.load_npz(mode+"/offline-rp3beta-cat"+str(i)+".npz") CF_USER = sps.load_npz(mode + "/cfu_eurm-cat"+str(i)+".npz") SLIM = sps.load_npz(mode +"/slim_bpr_completo_test1-cat"+str(i)+".npz") CBF_USER_ARTIST = sps.load_npz(mode +"/eurm_cbfu_artists_offline-cat"+str(i)+".npz") matrix = [CBF_ALBUM, CBF_ARTISTA, NLP, RP3BETA, CF_USER, SLIM, CBF_USER_ARTIST] we = w[i-1] res.append(ensembler(matrix, we, normalization_type="lele")) ret = sps.vstack(res).tocsr() if mode == "offline": ev.evaluate(eurm_to_recommendation_list(ret), "best_test", verbose=True) # sps.save_npz("ensemble_per_cat_"+mode+"_new_data_28_maggio.npz", ret) if mode == "online": sb = Submitter(dr) sb.submit(recommendation_list=eurm_to_recommendation_list_submission(ret), name="best_test", track="main", verify=True, gzipped=False)
# Similarity print('Similarity..') sim = tversky_similarity(ucm_artists, ucm_artists_T, shrink=200, target_items=test_pids, alpha=0.1, beta=1, k=knn, verbose=1, binary=False) sim = sim.tocsr() # Prediction eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] # Save eurm if save_eurm: sps.save_npz('eurm_' + name + '_' + mode + '.npz', eurm) # Submission sb.submit(recommendation_list=eurm_to_recommendation_list( eurm, datareader=dr), name=complete_name) else: print("Invalid mode!")
####### POSTPROCESSING ################################################################# # COMBINE eurm_ens = combine_two_eurms(clustered_approach_online, ensembled, cat_first=[3, 4, 5, 8, 10]) sim = generate_similarity('online') # HOLEBOOST hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1) hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1) # TAILBOOST tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row) eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5], last_tracks=[10, 3, 3, 3], k=[100, 80, 100, 100], gamma=[0.01, 0.01, 0.01, 0.01]) # ALBUMBOOST ab = AlbumBoost(dr, eurm_ens) eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9], gamma=2, top_k=[3, 3, 10, 40]) # SUBMISSION rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr) sb.submit(rec_list, name='creative_track', track='creative')
####### POSTPROCESSING ################################################################# # COMBINE eurm_ens = combine_two_eurms(clustered_approach_online, ensembled, cat_first=[4, 5, 6, 8, 10]) sim = generate_similarity('online') # HOLEBOOST hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1) hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1) # TAILBOOST tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row) eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5], last_tracks=[10, 3, 3, 3], k=[100, 80, 100, 100], gamma=[0.01, 0.01, 0.01, 0.01]) # ALBUMBOOST ab = AlbumBoost(dr, eurm_ens) eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9], gamma=2, top_k=[3, 3, 10, 40]) # SUBMISSION rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr) sb.submit(rec_list, name='main_track', track='main')
mode = 'online' if mode == 'offline': # Initialization dr = Datareader(mode='offline', only_load=True, verbose=False) ev = Evaluator(dr) # Prediction eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True) # Evaluation print('N_FACTORS =', n_factors) ev.evaluate(eurm_to_recommendation_list(eurm, datareader=dr), name='svd_' + str(n_factors)) elif mode == 'online': # Initialization dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) # Prediction eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True) # Submission sb.submit(eurm_to_recommendation_list_submission(eurm, datareader=dr), name='svd_' + str(n_factors)) else: print('Wrong mode!')
from personal.Tommaso.Recommenders.top_pop_rec import TopPopRecommender from utils.datareader import Datareader from utils.submitter import Submitter """ This script shows how to perform correctly a submission. Basically you have to initialize a Submitter object with csv files and then call the method submit which takes in input a numpy array of recommendations of shape (10.000, 500). """ # SUBMITTER dr = Datareader(mode='online', only_load=True) sb = Submitter(dr) # TOP POP t = TopPopRecommender() t.fit(dr.get_df_train_interactions(), dr.get_df_test_interactions()) rec_list = t.make_recommendation(dr.get_df_test_playlists()['pid'].as_matrix()) # SUBMISSION # rec_list is an ordered list of recommendations # This submission will be rejected due to duplicates occurrences. sb.submit(recommendation_list=rec_list, name='top_pop', track='main', verify=True, gzipped=False)
# Do not train on challenge set ucm_T = ucm.copy() inplace_set_rows_zero(ucm_T, test_pids).astype(np.float64) ucm_T = ucm_T.T # Compute similarity (playlists x playlists) sim = tversky_similarity(ucm, ucm_T, shrink=200, alpha=0.9, beta=1, k=knn) sim = sim.tocsr() # Recommendation eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] rec_list = eurm_to_recommendation_list(eurm, datareader=dr) if save_eurm: sps.save_npz(mode + "_" + name + ".npz", eurm, compressed=False) # Submission sb.submit(rec_list, name=name) else: print('Wrong mode!')
urm = urm[pids] ucm = bm25_row(ucm) similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1) similarity = similarity.tocsr() print(similarity.shape, urm.shape) eurm = dot_product(similarity, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[-10000:] eurm = eurm_remove_seed(eurm, dr) rec_list = eurm_to_recommendation_list(eurm) sps.save_npz(mode + "_" + name + "_knn" + str(knn) + "_bm25.npz", eurm, compressed=False) np.save(mode + "_" + name + "_knn" + str(knn) + "_bm25", rec_list) sb = Submitter(dr) sb.submit(rec_list, name=name, track="main", verify=True, gzipped=False)
eurm = dot_product(user_profile_batch, slim.W_sparse, k=500).tocsr() recommendation_list = eurm_to_recommendation_list(eurm) # calculating eurm, evaluation, save user_profile_batch = slim.URM_train[pids_converted] eurm = dot_product(user_profile_batch, slim.W_sparse, k=500).tocsr() recommendation_list = eurm_to_recommendation_list(eurm) sps.save_npz(ROOT_DIR + "/results/" + complete_name + ".npz", eurm, compressed=False) sb = Submitter(dr) sb.submit( recommendation_list=eurm_to_recommendation_list_submission(eurm), name=name, track="main", verify=True, gzipped=False) else: print("invalid mode.") # ev.evaluate(recommendation_list=recommendation_list, # name="slim ") # except Exception as e: # bot.error("Exception "+str(e)) # # bot.end()
norm = best_params_dict['norm'] del best_params_dict['norm'] # cutting and dot the value from ensemble eurms_full = [ value_from_bayesian * norms[norm](matrices_loaded[name][start_index:end_index]) for name, value_from_bayesian in best_params_dict.items()] # and summing up eurms_cutted[cat-1] = sum( [ matrix for matrix in eurms_full] ) # adding to reclist rec_list[start_index:end_index] = eurm_to_recommendation_list(eurm=eurms_cutted[cat-1], cat=cat, verbose=False)[start_index:end_index] eurm = eurms_cutted[0] for i in range(1,10): eurm = sps.vstack([eurm, eurms_cutted[i]]) sps.save_npz(file='../'+configuration_name+'/ensembled_'+configuration_name+'_'+mode, matrix=eurm) if mode=='offline': ev = Evaluator(dr) ev.evaluate(recommendation_list=rec_list, name=configuration_name) else: sb = Submitter(dr) sb.submit(recommendation_list=rec_list, name=configuration_name)