def _load_pairings_file(self): """ Build the pairings file for the two models. If a pairings file already exists, we ask the user whether they would like to overwrite the pairings file. """ pairings_filepath = get_hashed_combo_path( root_dir=self.fast_acute_args.root_dir, subdir='pairings_files', task=self.task, combos=self.combos, ) if not os.path.exists(pairings_filepath): self._build_pairings_file(pairings_filepath) else: modify_time = os.path.getmtime(pairings_filepath) self._print_progress( f'Pairings already exist {pairings_filepath}. Last modified {time.ctime(modify_time)}' ) if not self.fast_acute_args.use_existing_self_chat_files: answer = '' while answer.lower().strip() != 'y' and answer.lower().strip( ) != 'o': answer = input('Enter y to use, o to overwrite:') if answer.lower().strip() == 'o': self._build_pairings_file(pairings_filepath) self._print_progress(f'loading pairings file from {pairings_filepath}') self.pairings_filepath = pairings_filepath
def analyze_results(self, args: Optional[str] = None): """ Analyze results of ACUTE Eval run, using the optional input args. Save results to appropriate filepath. """ self._print_progress(f'Analyzing Results for run id {self.run_id}') parser = analysis_setup_args() if args is not None: arg_string = args.split() else: arg_string = [] opt = parser.parse_args(arg_string) today = datetime.date.today().isoformat() self.results_path = get_hashed_combo_path( root_dir=self.fast_acute_args.root_dir, subdir=f'acute_results/{today}/', task=self.task, combos=self.combos, ) opt.update({ 'model_strings': ','.join(self.models), 'run_ids': self.run_id, 'root_dir': self.fast_acute_args.root_dir, 'outdir': self.results_path, 'task': self.task, }) analyzer = self.ANALYZER(opt) self.results = analyzer.get_matchup_totals_with_significance() analyzer.save_results() self._print_progress(f'ACUTE Results: {self.results}') self._print_progress(f'ACUTE results saved to {self.results_path}')
def _load_pairing_files(self): df = self.dataframe if not os.path.exists(self.pairings_filepath): print('No valid pairings filepath was passed in: will extract likely path.') self.pairings_filepath = get_hashed_combo_path( root_dir=self.root_dir, subdir='pairings_files', task=self.task, combos=self.combos, ) if not os.path.exists(self.pairings_filepath): print( f'WARNING: Pairings filepath {self.pairings_filepath} could not be found.' ) self.pairings_filepath = os.path.join( self.root_dir, 'pairings_files', hashlib.sha1( '___vs___'.join( [f"{m}.{'q'.replace(':', '_')}" for m in self.models] ).encode('utf-8') ).hexdigest()[:10], ) if not os.path.exists(self.pairings_filepath): # For backward compatibility print( f'WARNING: Pairings filepath {self.pairings_filepath} could not be found.' ) self.pairings_filepath = os.path.join( self.root_dir, 'pairings_files', '___vs___'.join( [f"{m}.{self.task.replace(':', '_')}" for m in self.models] ), ) if not os.path.exists(self.pairings_filepath): print( f'NOTE: Pairings filepath {self.pairings_filepath} could not be found!' ) return self.pairings = [] with open(self.pairings_filepath, 'r') as f: for line in f: pair = json.loads(line) model1, model2 = pair['speakers_to_eval'] pair[model1] = pair['dialogue_dicts'][0] pair[model2] = pair['dialogue_dicts'][1] del pair['dialogue_dicts'] self.pairings.append(pair) self.pairs_to_eval = [self.pairings[i] for i in df.pairing_id.values.tolist()] # Build dialogue_ids => dialogue mappings winner_dialogues = [] loser_dialogues = [] for i, (_, row) in enumerate(df.iterrows()): winner = row['winner'] loser = row['loser'] winner_dialogues.append(self.pairs_to_eval[i][winner]) loser_dialogues.append(self.pairs_to_eval[i][loser]) df['pairs_to_eval'] = pd.Series(self.pairs_to_eval, index=df.index) df['winner_dialogue'] = pd.Series(winner_dialogues, index=df.index) df['loser_dialogue'] = pd.Series(loser_dialogues, index=df.index) self.dataframe = df