def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\ k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True): self.k_cbf = k_cbf self.k_i_i = k_i_i self.k_u_u = k_u_u self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.shrinkage_u_u = shrinkage_u_u self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf, tf_idf=self.tf_idf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf) self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf) self.als_recommender = IALS_numpy(num_factors=250, reg=100)
def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\ similarity='cosine', tf_idf=True): self.k_cbf = k_cbf self.k_i_i = k_i_i self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf, tf_idf=self.tf_idf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf)
import matplotlib.pyplot as plt evaluator = Evaluator() df = pd.DataFrame([[0, 0, 0]], columns=['knn', 'map', 'shr']) top_50 = pd.DataFrame([[0, 0, 0]], columns=['knn', 'map', 'shr']) shrinkage = 0 plot_graph = False while shrinkage < 50: map_list = [] knn_list = [] k = 10 while k < 100: rs = CbfRS(tracks_data, 10, k, shrinkage, tf_idf=False, bm25=True) rs.fit(train_data) print('knn: ', k, ' shrinkage: ', shrinkage) predictions = rs.recommend(target_data['playlist_id']) map_ = (evaluator.evaluate(predictions, test_data)) map_list.append(map_) df = df.append( pd.DataFrame([[k, map_, shrinkage]], columns=['knn', 'map', 'shr'])) top_50 = df.sort_values(by=['map']).tail(50) knn_list.append(k) k += 10 print(top_50) if plot_graph: plt.plot(knn_list, map_list, 'bs')
class HybridRS: train_data = pd.DataFrame() def __init__(self, tracks_data, at=10, k_cbf=35, shrinkage_cbf=150, k_i_i=700, shrinkage_i_i=200, \ k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True): self.k_cbf = k_cbf self.k_i_i = k_i_i self.k_u_u = k_u_u self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.shrinkage_u_u = shrinkage_u_u self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf) self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf) def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender(train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) self.pureSVD = PureSVDRecommender(train_data) self.pureSVD.fit() def recommend(self, playlist_ids, alpha=0.1, beta=1, gamma=1, delta=2, omega=30, eta=0.8, filter_top_pop=False): print("Recommending... Am I filtering top_top songs?", filter_top_pop) final_prediction = {} counter = 0 # e_r_ stands for estimated rating e_r_cbf = self.cbf_recommender.get_estimated_ratings() e_r_col_i_i = self.col_i_i_recommender.get_estimated_ratings() e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings() e_r_slim_bpr = self.slim_recommender.get_estimated_ratings() e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings() ''' print("CBF") print(e_r_cbf[7].data[e_r_cbf[7].data.argsort()[::-1]]) print("COL_I_I") print(e_r_col_i_i[7].data[e_r_col_i_i[7].data.argsort()[::-1]]) print("COL_U_U") print(e_r_col_u_u[7].data[e_r_col_u_u[7].data.argsort()[::-1]]) print("pureSVD") print(e_r_pureSVD[7].data[e_r_pureSVD[7].data.argsort()[::-1]]) ''' estimated_ratings_aux1 = e_r_col_u_u.multiply(alpha) + e_r_col_i_i.multiply(beta) + e_r_cbf.multiply(gamma) estimated_ratings_aux2 = estimated_ratings_aux1 + e_r_slim_bpr.multiply(delta) estimated_ratings_final = estimated_ratings_aux2 + e_r_slim_elasticNet.multiply(omega) print('after sum..') for k in playlist_ids: try: row = estimated_ratings_final[k] # getting the row from svd # try with check matrix.. mf_row = sparse.csr_matrix(self.pureSVD.compute_score_SVD(k)).multiply(eta) # summing it to the row we are considering row += mf_row # aux contains the indices (track_id) of the most similar songs indx = row.data.argsort()[::-1] aux = row.indices[indx] user_playlist = self.urm[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist) if filter_top_pop: top_songs = filter_seen_array(top_songs, self.top_pop_songs)[:self.at] else: top_songs = top_songs[:self.at] if len(top_songs) < 10: print("Francisco was right once") string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) except IndexError: print("I don't have a value in the test_data") if (counter % 1000) == 0: print("Playlist num", counter, "/10000") counter += 1 df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df
evaluator = Evaluator() profile_length = np.ediff1d(URM_train.indptr) block_size = int(len(profile_length) * 0.05) sorted_users = np.argsort(profile_length) rs_i_i_cf = ColBfIIRS(10, 750, 50, tf_idf=True) rs_i_i_cf.fit(train_data) # predictions_item_item = rs_i_i_cf.recommend(target_data['playlist_id']) map_item_item = [] ''' rs_u_u_cf = ColBfUURS(10, 200, 50, tf_idf=True) rs_u_u_cf.fit(train_data) predictions_user_user = rs_u_u_cf.recommend(target_data['playlist_id']) map_user_user = [] ''' rs_content = CbfRS(tracks_data, 10, 10, 10, tf_idf=True) ICM_all = buildICMMatrix(tracks_data, 1, 1, use_tracks_duration=False) rs_content.fit(train_data) predictions_content = rs_content.recommend(target_data['playlist_id']) evaluator.evaluate(predictions_content, test_data) map_content_based = [] ''' rs_pureSVD = PureSVDRecommender(train_data) rs_pureSVD.fit() predictions_pureSVD = rs_pureSVD.recommend(target_data['playlist_id']) map_pureSVD = [] rs_mf_skl = MfNnz(train_data) rs_mf_skl.fit() prediction_mf_skl = rs_mf_skl.recommend(target_data['playlist_id']) map_mf_sl = []
class HybridRS: train_data = pd.DataFrame() def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\ k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True): self.k_cbf = k_cbf self.k_i_i = k_i_i self.k_u_u = k_u_u self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.shrinkage_u_u = shrinkage_u_u self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf, tf_idf=self.tf_idf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf) self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf) def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head( 20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender( train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) def recommend(self, playlist_ids, alpha=0.1, beta=1, gamma=1, delta=2, omega=30, filter_top_pop=False): print("Recommending... Am I filtering top_top songs?", filter_top_pop) final_prediction = {} counter = 0 # e_r_ stands for estimated rating e_r_cbf = self.cbf_recommender.get_estimated_ratings() e_r_col_i_i = self.col_i_i_recommender.get_estimated_ratings() e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings() e_r_slim_bpr = self.slim_recommender.get_estimated_ratings() e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings( ) ''' print("CBF") print(e_r_cbf[7].data[e_r_cbf[7].data.argsort()[::-1]]) print("COL_I_I") print(e_r_col_i_i[7].data[e_r_col_i_i[7].data.argsort()[::-1]]) print("COL_U_U") print(e_r_col_u_u[7].data[e_r_col_u_u[7].data.argsort()[::-1]]) print("SLIM") print(e_r_slim_bpr[7].data[e_r_slim_bpr[7].data.argsort()[::-1]]) ''' estimated_ratings_aux1 = e_r_col_u_u.multiply( alpha) + e_r_col_i_i.multiply(beta) + e_r_cbf.multiply(gamma) # print("Hybrid") # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]]) estimated_ratings_aux2 = estimated_ratings_aux1 + e_r_slim_bpr.multiply( delta) estimated_ratings_final = estimated_ratings_aux2 + e_r_slim_elasticNet.multiply( omega) # print("FINAL") # print(estimated_ratings_final[7].data[estimated_ratings_final[7].data.argsort()[::-1]]) for k in playlist_ids: try: row = estimated_ratings_final[k] indx = row.data.argsort()[::-1] aux = row.indices[indx] user_playlist = self.urm[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist)[:self.at] string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) except IndexError: print("I don't have a value in the test_data") if (counter % 1000) == 0: print("Playlist num", counter, "/10000") counter += 1 df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df
class HybridRS: def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\ k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True): self.k_cbf = k_cbf self.k_i_i = k_i_i self.k_u_u = k_u_u self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.shrinkage_u_u = shrinkage_u_u self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf, tf_idf=self.tf_idf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf) self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf) def fit(self, train_data): self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) print("All systems are fitted") def recommend(self, playlist_ids, alpha=1, beta=5, gamma=7): print("Recommending...") final_prediction = {} counter = 0 # alpha = 0.7 # best until now estimated_ratings_cbf = self.cbf_recommender.get_estimated_ratings() estimated_ratings_col_i_i = self.col_i_i_recommender.get_estimated_ratings() estimated_ratings_col_u_u = self.col_u_u_recommender.get_estimated_ratings() estimated_ratings_final = estimated_ratings_col_u_u.multiply(alpha)\ + estimated_ratings_col_i_i.multiply(beta)\ + estimated_ratings_cbf.multiply(gamma) for k in playlist_ids: try: row = estimated_ratings_final[k] # aux contains the indices (track_id) of the most similar songs indx = row.data.argsort()[::-1] aux = row.indices[indx] user_playlist = self.urm[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist)[:self.at] string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) except IndexError: print("I don't have a value in the test_data") if (counter % 1000) == 0: print("Playlist num", counter, "/10000") counter += 1 df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df
class HybridRS: train_data = pd.DataFrame() def __init__(self, tracks_data, at=10, k_cbf=10, shrinkage_cbf=10, k_i_i=700, shrinkage_i_i=200,\ k_u_u=200, shrinkage_u_u=50, similarity='cosine', tf_idf=True, bm_25=False): self.k_cbf = k_cbf self.k_i_i = k_i_i self.k_u_u = k_u_u self.at = at self.shrinkage_cbf = shrinkage_cbf self.shrinkage_i_i = shrinkage_i_i self.shrinkage_u_u = shrinkage_u_u self.similarity = similarity self.tf_idf = tf_idf self.cbf_recommender = CbfRS(tracks_data, self.at, self.k_cbf, self.shrinkage_cbf, tf_idf=self.tf_idf) self.col_i_i_recommender = ColBfIIRS(self.at, self.k_i_i, self.shrinkage_i_i, tf_idf=self.tf_idf) self.col_u_u_recommender = ColBfUURS(self.at, self.k_u_u, self.shrinkage_u_u, tf_idf=self.tf_idf) def fit(self, train_data, lambda_i=0.001, lambda_j=0.001, topK_bpr=200, l1_ratio=0.1, topK_elasticNet=300, alpha_elasticNet=0.0002, sgd_mode='sgd'): print('Fitting...') self.urm = buildURMMatrix(train_data) self.top_pop_songs = train_data['track_id'].value_counts().head(20).index.values self.col_i_i_recommender.fit(train_data) self.col_u_u_recommender.fit(train_data) self.cbf_recommender.fit(train_data) self.pureSVD = PureSVDRecommender(train_data) self.pureSVD.fit() self.p3alpha = P3alphaRecommender(train_data) self.p3alpha.fit() self.rp3beta = RP3betaRecommender(train_data) self.rp3beta.fit() self.slim_recommender = SLIM_BPR_Cython(train_data) self.slim_recommender.fit(lambda_i=lambda_i, lambda_j=lambda_j, topK=topK_bpr, sgd_mode=sgd_mode) self.slim_elasticNet_recommender = SLIMElasticNetRecommender(train_data) self.slim_elasticNet_recommender.fit(l1_ratio=l1_ratio, topK=topK_elasticNet, alpha=alpha_elasticNet) def recommend(self, playlist_ids, alpha=0.2, beta=10, gamma=1, delta=2, omega=30, eta=10, theta=30, sigma=1, filter_top_pop=False): print("Recommending... Am I filtering top_top songs?", filter_top_pop) final_prediction = {} cbf_sym = self.cbf_recommender.get_sym_matrix(gamma) cii_sym = self.col_i_i_recommender.get_sym_matrix(beta) p3a_sym = self.p3alpha.get_sym_matrix(theta) rp3b_sym = self.rp3beta.get_sym_matrix(sigma) slim_sym = self.slim_recommender.get_sym_matrix(delta) en_sym = self.slim_elasticNet_recommender.get_sym_matrix(omega) sym = cbf_sym + cii_sym + p3a_sym + slim_sym + en_sym + rp3b_sym # e_r_ stands for estimated rating e_r_hybrid = self.urm*sym # print(e_r_hybrid) e_r_col_u_u = self.col_u_u_recommender.get_estimated_ratings() ''' e_r_slim_bpr = self.slim_recommender.get_estimated_ratings() e_r_slim_elasticNet = self.slim_elasticNet_recommender.get_estimated_ratings() ''' # estimated_ratings_pureSVD = self.pureSVD.U.dot(self.pureSVD.s_Vt) # print(estimated_ratings_pureSVD) estimated_ratings_final = e_r_col_u_u.multiply(alpha) + e_r_hybrid # + estimated_ratings_pureSVD * eta for k in tqdm(playlist_ids): try: row = estimated_ratings_final[k].toarray()[0] + (self.pureSVD.compute_score_SVD(k)*eta) ''' indx = row.data.argsort()[::-1] aux = row.indices[indx] ''' aux = row.argsort()[::-1] user_playlist = self.urm[k] aux = np.concatenate((aux, self.top_pop_songs), axis=None) top_songs = filter_seen(aux, user_playlist)[:self.at] string = ' '.join(str(e) for e in top_songs) final_prediction.update({k: string}) except IndexError: print("I don't have a value in the test_data") df = pd.DataFrame(list(final_prediction.items()), columns=['playlist_id', 'track_ids']) # print(df) return df