示例#1
0
def calculate_metrics(df):
    """ Calculates metrics at different k (1 to 10 + 20,30,40,50)"""
    #print(df.columns)
    klist = list(range(1, 11))
    klist.extend([20, 30, 40, 50, 100, 200, 300, 500])
    print(klist)
    # 14 x 3 x 4 columns added for each
    for k in tqdm(klist):
        df['average_precision_p2v_{}'.format(k)] = df['p2v_binary'].apply(lambda x: average_precision(x, k))
        df['recall_p2v_{}'.format(k)] = df[['p2v_binary', 'ground_truth']].apply(
            lambda x: recall_at_k(x.p2v_binary, x.ground_truth, k), axis=1)    
        df['reciprocal_rank_p2v_{}'.format(k)] = df['p2v_binary'].apply(lambda x: reciprocal_rank(x, k))
        df['ndcg_p2v_{}'.format(k)] = df['p2v_binary'].apply(lambda x: ndcg(x, k))
        df['average_precision_d2v_{}'.format(k)] = df['d2v_binary'].apply(lambda x: average_precision(x, k))
        df['recall_d2v_{}'.format(k)] = df[['d2v_binary', 'ground_truth']].apply(
            lambda x: recall_at_k(x.d2v_binary, x.ground_truth, k), axis=1)    
        df['reciprocal_rank_D2v_{}'.format(k)] = df['d2v_binary'].apply(lambda x: reciprocal_rank(x, k))
        df['ndcg_d2v_{}'.format(k)] = df['d2v_binary'].apply(lambda x: ndcg(x, k))
    
    df.to_pickle('/home/ashwath/Programs/MAGCS/Pickles/paperwisemetrics_mag50_d2v_p2v_may23_df.pickle')
    print("METRICS CALCULATED, time to calculate the means")
    # Get the mean of all the index columns
    df = df.drop(['p2v_recommendations', 'p2v_binary', 'd2v_recommendations', 'd2v_binary', 'ground_truth'], axis=1)
    mean_series = df.mean()
    mean_series.to_csv('/home/ashwath/Programs/MAGCS/Evaluation/meanmetrics_mag50_d2v_p2v_may21.tsv', sep='\t', index=True, header=False)
    print("C'est fini.")
def get_scores(ds_bios, plays_full, plays_train, norm_plays_full, norm_plays_train,cf_model, tfIdfRecommender,artist_index, index_artist, methodKeys,kk):
    
    NUSERS,NARTISTS = plays_full.shape    

    global the_user_id
   
    completed = 0
    new_completed = 0

    lightUsers = get_rnd_rank(NUSERS,[],100)

    ranks = {}

    for user_id in lightUsers: #range(the_user_id,NUSERS):
        the_user_id = user_id
        print_progress(completed, user_id, NUSERS)

        # Colaborative filtering rank
        ranks['cf'] =[i for i,x in cf_model.recommend(user_id, norm_plays_train,N=max(kk) ) ]
        # get history of artistid
        user_history_indexs = (plays_train[user_id] > 1).nonzero()[1] 

        # mapped to artistnames from user artist history
        user_history =  [index_artist[artistid] for artistid in user_history_indexs]
        
        # Content based rank 
        ranks['cb'] = get_cb_rank(ds_bios, user_history, tfIdfRecommender, artist_index,max(kk))

        # Hybrid mixed rank
        ranks['hb'] = mix(ranks['cf'], ranks['cb'])[:max(kk)]

        # Random baseline rank
        rnd_rank = get_rnd_rank(NARTISTS,user_history_indexs, max(kk))

        scores = {}
        relevants={}
        rnd_relevants = []
        upper_bound = 0

        # Calculate relevants and scores for each method
        for method in methodKeys:
                scores[method] = []
                relevants[method] = []
                for artist_id in ranks[method]:
                        ground_truth = plays_full[user_id,artist_id]
                        relevants[method].append(1 if ground_truth > 1 else 0)
                        
                        norm_ground_truth = norm_plays_full[user_id,artist_id]
                        scores[method].append(norm_ground_truth)
       
        # Rnd Baseline
        for artist_id in rnd_rank:
                try: 
                        ground_truth = plays_full[user_id,artist_id]
                except:
                        ground_truth = 0
                finally:
                        rnd_relevants.append(1 if ground_truth > 1 else 0) 
        
        # Upper Bound
        x, nonzero = plays_full[user_id].nonzero()
        for artist_id in nonzero:
                ground_truth = plays_full[user_id,artist_id]
                try:
                        train = plays_train[user_id,artist_id]
                except:
                        train = 0
                finally:
                        if(train == 0 and ground_truth > 1):
                                upper_bound += 1

        # save user metrics
        for k in kk:
                rnd_baselines[k].append(sum(rnd_relevants[:k])/k)
                upper_bounds[k].append(1 if upper_bound/k > 1 else upper_bound/k)

        for method in methodKeys:
            for k in kk:
                diversities[method][k].update(ranks[method][:k])
                precisions[method][k].append(sum(relevants[method][:k])/k)
                ndcgs[method][k].append(metrics.ndcg_at_k(scores[method][:k], k))
                mrrs[method][k].append(metrics.reciprocal_rank(relevants[method][:k]))

    return rnd_baselines, upper_bounds, diversities, precisions, mrrs, ndcgs