示例#1
0
 def fit(self):
     # Note: the 'ground_truth.csv' file is the test WITH the references that have been removed
     # Ask to @teomore to have it
     data_directory = self.data_directory.joinpath(self.mode)
     self.dict_sub_scores = {}
     # This creates a dictonary that holds for each sub the list of the scores (1 for each impression)
     for d in self.dfs_subname:
         print(f'Getting scores for {d[1]}...')
         if os.path.isfile(self.data_directory.joinpath(f'scores/scores_{d[1]}.pkl')): # if the scores were previously computed, simply loads them
             with open(self.data_directory.joinpath(f'scores/scores_{d[1]}.pkl'), 'rb') as file:
                 self.dict_sub_scores[d[1]] = pickle.load(file)
         else:
             # if there are no scores previously computed...
             # first: generate a sub for each impression "column"
             # second: score each sub
             # third: append each score to the scores' list
             scores = []
             self.generate_column_subs(d)
             for n in tqdm(range(1, 25)):
                 subm_csv = self.data_directory.joinpath(f'scores/item_{d[1]}_{n}.csv')
                 mrr = f.score_submissions(subm_csv, self.gt_csv, f.get_reciprocal_ranks)
                 scores.append(mrr)
             self.dict_sub_scores[d[1]] = scores
             print('Saving list...')
             with open(self.data_directory.joinpath(f'scores/scores_{d[1]}.pkl'), 'wb') as file:
                 pickle.dump(scores, file)
             # Remove files scores/item_{d[1]}_{n}
             for n in tqdm(range(1, 25)):
                 os.remove(self.data_directory.joinpath(f'scores/item_{d[1]}_{n}.csv'))
     return self.dict_sub_scores
 def __init__(self, sub_name):
     self.current_directory = Path(__file__).absolute().parent
     self.data_directory = self.current_directory.joinpath('..', 'submissions/hybrid/scores')
     self.sub_name = sub_name
     self.gt_csv = self.current_directory.joinpath('..', 'submissions/evaluator', 'ground_truth.csv')
     self.sub_scores = []
     self.subm_csv = self.current_directory.joinpath('..', 'submissions/evaluator', f'{sub_name}.csv')
     self.overall_score = f.score_submissions(self.subm_csv, self.gt_csv, f.get_reciprocal_ranks)
示例#3
0
 def score_sub(self, submission):
     #compute the score of a submission using utils/functions.py
     mrr = f.score_submissions(submission,
                               self.gt_csv,
                               f.get_reciprocal_ranks,
                               subm_csv_is_file=False)
     print(f'Score: {mrr}')
     return mrr
 def score_sub(self, sub_path, gt_csv, total_score=-1):
     cluster_name = os.path.basename(self.data_directory.joinpath(gt_csv))
     cluster_name = os.path.splitext(cluster_name)[0]
     print("Computing score for"+f"\033[1;35;40m {sub_path}"+ '\033[0;37;40m' +" with cluster" + f"\033[1;35;40m {cluster_name}"+ '\033[0;37;40m')
     subm_csv = self.data_directory.joinpath(sub_path)
     mrr = f.score_submissions(subm_csv, gt_csv, f.get_reciprocal_ranks)
     if total_score != -1:
         if mrr > total_score:
             print('\033[1;40m Score: '+ f'\033[1;32;40m {mrr}'+ '\033[1;40m    :)' + '\033[0;37;40m')
         elif mrr < total_score-0.02 and mrr > total_score-0.04:
             print('\033[1;40m Score: '+ f'\033[1;31;40m {mrr}'+ '\033[1;40m    :(' + '\033[0;37;40m')
         elif mrr < total_score - 0.04:
             print('\033[1;40m Score: '+ f'\033[1;31;40m {mrr}'+ '\033[1;40m    ¯\_(⊙︿⊙)_/¯     <--------- PROBLEM HERE!' + '\033[0;37;40m')
         else:
             print('\033[1;40m Score: '+ f'\033[1;33;40m {mrr}'+ '\033[1;40m     :|' + '\033[0;37;40m')
     else:
         print('\033[1;40m Score: '+ f'\033[1;32;40m {mrr}'+ '\033[0;37;40m')
     return mrr
 def get_scores(self):
     print(f'Getting scores for {self.sub_name}...')
     if os.path.isfile(self.data_directory.joinpath(f'scores_{self.sub_name}.pkl')): # if the scores were previously computed, simply loads them
         with open(self.data_directory.joinpath(f'scores_{self.sub_name}.pkl'), 'rb') as file:
             self.sub_scores = pickle.load(file)
     else:
         # if there are no scores previously computed...
         # first: generate a sub for each impression "column"
         # second: score each sub
         # third: append each score to the scores' list
         scores = []
         self.generate_column_subs()
         for n in tqdm(range(1, 25)):
             subm_csv = self.data_directory.joinpath(f'item_{self.sub_name}_{n}.csv')
             mrr = f.score_submissions(subm_csv, self.gt_csv, f.get_reciprocal_ranks)
             scores.append(mrr)
         self.sub_scores = scores
         print('Saving list...')
         with open(self.data_directory.joinpath(f'scores_{self.sub_name}.pkl'), 'wb') as file:
             pickle.dump(scores, file)
         for n in tqdm(range(1, 25)):
             os.remove(self.data_directory.joinpath(f'item_{self.sub_name}_{n}.csv'))