def calculate_metrics(df): """ Calculates metrics at different k (1 to 10 + 20,30,40,50)""" #print(df.columns) klist = list(range(1, 11)) klist.extend([20, 30, 40, 50, 100, 200, 300, 500]) print(klist) # 14 x 3 x 4 columns added for each for k in tqdm(klist): df['average_precision_p2v_{}'.format(k)] = df['p2v_binary'].apply(lambda x: average_precision(x, k)) df['recall_p2v_{}'.format(k)] = df[['p2v_binary', 'ground_truth']].apply( lambda x: recall_at_k(x.p2v_binary, x.ground_truth, k), axis=1) df['reciprocal_rank_p2v_{}'.format(k)] = df['p2v_binary'].apply(lambda x: reciprocal_rank(x, k)) df['ndcg_p2v_{}'.format(k)] = df['p2v_binary'].apply(lambda x: ndcg(x, k)) df['average_precision_d2v_{}'.format(k)] = df['d2v_binary'].apply(lambda x: average_precision(x, k)) df['recall_d2v_{}'.format(k)] = df[['d2v_binary', 'ground_truth']].apply( lambda x: recall_at_k(x.d2v_binary, x.ground_truth, k), axis=1) df['reciprocal_rank_D2v_{}'.format(k)] = df['d2v_binary'].apply(lambda x: reciprocal_rank(x, k)) df['ndcg_d2v_{}'.format(k)] = df['d2v_binary'].apply(lambda x: ndcg(x, k)) df.to_pickle('/home/ashwath/Programs/MAGCS/Pickles/paperwisemetrics_mag50_d2v_p2v_may23_df.pickle') print("METRICS CALCULATED, time to calculate the means") # Get the mean of all the index columns df = df.drop(['p2v_recommendations', 'p2v_binary', 'd2v_recommendations', 'd2v_binary', 'ground_truth'], axis=1) mean_series = df.mean() mean_series.to_csv('/home/ashwath/Programs/MAGCS/Evaluation/meanmetrics_mag50_d2v_p2v_may21.tsv', sep='\t', index=True, header=False) print("C'est fini.")
def get_scores(ds_bios, plays_full, plays_train, norm_plays_full, norm_plays_train,cf_model, tfIdfRecommender,artist_index, index_artist, methodKeys,kk): NUSERS,NARTISTS = plays_full.shape global the_user_id completed = 0 new_completed = 0 lightUsers = get_rnd_rank(NUSERS,[],100) ranks = {} for user_id in lightUsers: #range(the_user_id,NUSERS): the_user_id = user_id print_progress(completed, user_id, NUSERS) # Colaborative filtering rank ranks['cf'] =[i for i,x in cf_model.recommend(user_id, norm_plays_train,N=max(kk) ) ] # get history of artistid user_history_indexs = (plays_train[user_id] > 1).nonzero()[1] # mapped to artistnames from user artist history user_history = [index_artist[artistid] for artistid in user_history_indexs] # Content based rank ranks['cb'] = get_cb_rank(ds_bios, user_history, tfIdfRecommender, artist_index,max(kk)) # Hybrid mixed rank ranks['hb'] = mix(ranks['cf'], ranks['cb'])[:max(kk)] # Random baseline rank rnd_rank = get_rnd_rank(NARTISTS,user_history_indexs, max(kk)) scores = {} relevants={} rnd_relevants = [] upper_bound = 0 # Calculate relevants and scores for each method for method in methodKeys: scores[method] = [] relevants[method] = [] for artist_id in ranks[method]: ground_truth = plays_full[user_id,artist_id] relevants[method].append(1 if ground_truth > 1 else 0) norm_ground_truth = norm_plays_full[user_id,artist_id] scores[method].append(norm_ground_truth) # Rnd Baseline for artist_id in rnd_rank: try: ground_truth = plays_full[user_id,artist_id] except: ground_truth = 0 finally: rnd_relevants.append(1 if ground_truth > 1 else 0) # Upper Bound x, nonzero = plays_full[user_id].nonzero() for artist_id in nonzero: ground_truth = plays_full[user_id,artist_id] try: train = plays_train[user_id,artist_id] except: train = 0 finally: if(train == 0 and ground_truth > 1): upper_bound += 1 # save user metrics for k in kk: rnd_baselines[k].append(sum(rnd_relevants[:k])/k) upper_bounds[k].append(1 if upper_bound/k > 1 else upper_bound/k) for method in methodKeys: for k in kk: diversities[method][k].update(ranks[method][:k]) precisions[method][k].append(sum(relevants[method][:k])/k) ndcgs[method][k].append(metrics.ndcg_at_k(scores[method][:k], k)) mrrs[method][k].append(metrics.reciprocal_rank(relevants[method][:k])) return rnd_baselines, upper_bounds, diversities, precisions, mrrs, ndcgs