mode = "offline" knn = 100 topk = 750 complete_name = "maurizio_" + mode + "__knn=" + str(knn) + "_topk=" + str(topk) if __name__ == '__main__': sim = sps.load_npz(ROOT_DIR + "/similarities/offline-similarity_rp3beta_knn100.npz") dr = Datareader(mode=mode, only_load=True) ######### MAURIZ ICM = dr.get_icm(alid=True) cfw = CFW_D_Similarity_Linalg(URM_train=dr.get_urm(), ICM=ICM.copy(), S_matrix_target=sim, URM_validation=None) cfw.fit() weights = sps.diags(cfw.D_best) sps.save_npz("ICM_fw_maurizio", weights) ICM_weighted = ICM.dot(weights) sps.save_npz("ICM_fw_maurizio", ICM_weighted)
album = True artist = False elif feature_type == 'artist': album = False artist = True else: print("invalid type") complete_name = mode + "_" + name + "_" + feature_type + "_knn=" + str( knn) + "_topk=" + str(topk) if mode == "offline": dr = Datareader(verbose=False, mode=mode, only_load=True) urm = dr.get_urm() icm = dr.get_icm(arid=artist, alid=album) pid = dr.get_test_pids() icm_bm25 = pre.bm25_row(icm) cbfi = Knn_content_item() cbfi.fit(urm, icm_bm25, pid) cbfi.compute_model(top_k=knn, sm_type=TVERSKY, shrink=100, alpha=0.1, binary=False, verbose=True) cbfi.compute_rating(top_k=topk, verbose=True, small=True) sps.save_npz(complete_name + ".npz", cbfi.eurm)
import numpy as np import sys from utils.datareader import Datareader from utils.definitions import * from utils.evaluator import Evaluator from sklearn.utils.sparsefuncs import inplace_csr_column_scale from recommenders.similarity.dot_product import dot_product from recommenders.similarity.s_plus import tversky_similarity import time from utils.post_processing import * from scipy import sparse from utils.pre_processing import * from utils.submitter import Submitter from fbpca import pca from fbpca import set_matrix_mult datareader = Datareader(mode='offline', only_load=True) evaluator = Evaluator(datareader) urm = datareader.get_urm() icm = datareader.get_icm(arid=True) print('PCA...') u, s, v = pca(icm, k=100) print('Dot...') icm_new = set_matrix_mult() print(icm_new)
trs = df['tid'].values n = len(df) pids = df['new_pid'].values del df n_pids = pids.max() + 1 # index starts from 0 # create partial icm icm_ = sp.csr_matrix((np.ones(n), (trs, pids)), shape=(n_tracks, n_pids), dtype=np.int32) icm_cat8 = sp.hstack([icm_cat8, icm_]) icm_cat10 = sp.hstack([icm_cat10, icm_]) urm = dr.get_urm() icm_pl = urm.copy().T icm_al = dr.get_icm(arid=False, alid=True) icm_ar = dr.get_icm(arid=True, alid=False) icm_cat8 = sp.hstack([icm_cat8, icm_al, icm_ar]) icm_cat10 = sp.hstack([icm_cat10, icm_al, icm_ar]) # hybrid cat8 for i in range(0, 5): icm_cat8 = sp.hstack([icm_cat8, icm_pl]) # hybrid cat10 for i in range(0, 3): icm_cat10 = sp.hstack([icm_cat10, icm_pl]) # dump the icms sp.save_npz(ROOT_DIR + '/data/hybrid_icm_cat8_' + mode + '.npz', icm_cat8)