def fit(self): # Note: the 'ground_truth.csv' file is the test WITH the references that have been removed # Ask to @teomore to have it data_directory = self.data_directory.joinpath(self.mode) self.dict_sub_scores = {} # This creates a dictonary that holds for each sub the list of the scores (1 for each impression) for d in self.dfs_subname: print(f'Getting scores for {d[1]}...') if os.path.isfile(self.data_directory.joinpath(f'scores/scores_{d[1]}.pkl')): # if the scores were previously computed, simply loads them with open(self.data_directory.joinpath(f'scores/scores_{d[1]}.pkl'), 'rb') as file: self.dict_sub_scores[d[1]] = pickle.load(file) else: # if there are no scores previously computed... # first: generate a sub for each impression "column" # second: score each sub # third: append each score to the scores' list scores = [] self.generate_column_subs(d) for n in tqdm(range(1, 25)): subm_csv = self.data_directory.joinpath(f'scores/item_{d[1]}_{n}.csv') mrr = f.score_submissions(subm_csv, self.gt_csv, f.get_reciprocal_ranks) scores.append(mrr) self.dict_sub_scores[d[1]] = scores print('Saving list...') with open(self.data_directory.joinpath(f'scores/scores_{d[1]}.pkl'), 'wb') as file: pickle.dump(scores, file) # Remove files scores/item_{d[1]}_{n} for n in tqdm(range(1, 25)): os.remove(self.data_directory.joinpath(f'scores/item_{d[1]}_{n}.csv')) return self.dict_sub_scores
def __init__(self, sub_name): self.current_directory = Path(__file__).absolute().parent self.data_directory = self.current_directory.joinpath('..', 'submissions/hybrid/scores') self.sub_name = sub_name self.gt_csv = self.current_directory.joinpath('..', 'submissions/evaluator', 'ground_truth.csv') self.sub_scores = [] self.subm_csv = self.current_directory.joinpath('..', 'submissions/evaluator', f'{sub_name}.csv') self.overall_score = f.score_submissions(self.subm_csv, self.gt_csv, f.get_reciprocal_ranks)
def score_sub(self, submission): #compute the score of a submission using utils/functions.py mrr = f.score_submissions(submission, self.gt_csv, f.get_reciprocal_ranks, subm_csv_is_file=False) print(f'Score: {mrr}') return mrr
def score_sub(self, sub_path, gt_csv, total_score=-1): cluster_name = os.path.basename(self.data_directory.joinpath(gt_csv)) cluster_name = os.path.splitext(cluster_name)[0] print("Computing score for"+f"\033[1;35;40m {sub_path}"+ '\033[0;37;40m' +" with cluster" + f"\033[1;35;40m {cluster_name}"+ '\033[0;37;40m') subm_csv = self.data_directory.joinpath(sub_path) mrr = f.score_submissions(subm_csv, gt_csv, f.get_reciprocal_ranks) if total_score != -1: if mrr > total_score: print('\033[1;40m Score: '+ f'\033[1;32;40m {mrr}'+ '\033[1;40m :)' + '\033[0;37;40m') elif mrr < total_score-0.02 and mrr > total_score-0.04: print('\033[1;40m Score: '+ f'\033[1;31;40m {mrr}'+ '\033[1;40m :(' + '\033[0;37;40m') elif mrr < total_score - 0.04: print('\033[1;40m Score: '+ f'\033[1;31;40m {mrr}'+ '\033[1;40m ¯\_(⊙︿⊙)_/¯ <--------- PROBLEM HERE!' + '\033[0;37;40m') else: print('\033[1;40m Score: '+ f'\033[1;33;40m {mrr}'+ '\033[1;40m :|' + '\033[0;37;40m') else: print('\033[1;40m Score: '+ f'\033[1;32;40m {mrr}'+ '\033[0;37;40m') return mrr
def get_scores(self): print(f'Getting scores for {self.sub_name}...') if os.path.isfile(self.data_directory.joinpath(f'scores_{self.sub_name}.pkl')): # if the scores were previously computed, simply loads them with open(self.data_directory.joinpath(f'scores_{self.sub_name}.pkl'), 'rb') as file: self.sub_scores = pickle.load(file) else: # if there are no scores previously computed... # first: generate a sub for each impression "column" # second: score each sub # third: append each score to the scores' list scores = [] self.generate_column_subs() for n in tqdm(range(1, 25)): subm_csv = self.data_directory.joinpath(f'item_{self.sub_name}_{n}.csv') mrr = f.score_submissions(subm_csv, self.gt_csv, f.get_reciprocal_ranks) scores.append(mrr) self.sub_scores = scores print('Saving list...') with open(self.data_directory.joinpath(f'scores_{self.sub_name}.pkl'), 'wb') as file: pickle.dump(scores, file) for n in tqdm(range(1, 25)): os.remove(self.data_directory.joinpath(f'item_{self.sub_name}_{n}.csv'))