def collect(self): """ Collects and cleans data on the running Tune experiment from the Tune logs so that users can see this information in the front-end client """ sub_dirs = os.listdir(self._logdir) job_names = filter( lambda d: os.path.isdir(os.path.join(self._logdir, d)), sub_dirs) self._trial_records = {} # search through all the sub_directories in log directory for job_name in job_names: analysis = Analysis(str(os.path.join(self._logdir, job_name))) df = analysis.dataframe() if len(df) == 0: continue self._available = True # make sure that data will convert to JSON without error df["trial_id"] = df["trial_id"].astype(str) df = df.fillna(0) # convert df to python dict df = df.set_index("trial_id") trial_data = df.to_dict(orient="index") # clean data and update class attribute if len(trial_data) > 0: trial_data = self.clean_trials(trial_data, job_name) self._trial_records.update(trial_data)
def analyse_ray_dump(ray_directory, topn, metric="avg_inc_acc"): if metric not in ("avg_inc_acc", "last_acc"): raise NotImplementedError("Unknown metric {}.".format(metric)) ea = Analysis(ray_directory) trials_dataframe = ea.dataframe() trials_dataframe = trials_dataframe.sort_values(by=metric, ascending=False) mapping_col_to_index = {} result_index = -1 for index, col in enumerate(trials_dataframe.columns): if col.startswith("config:"): mapping_col_to_index[col[7:]] = index elif col == metric: result_index = index print("Ray config: {}".format(ray_directory)) print("Best Config:") print("{}: {} with {}.".format( metric, trials_dataframe.iloc[0][result_index], _get_line_results(trials_dataframe, 0, mapping_col_to_index))) print("\nFollowed by:") if topn < 0: topn = len(trials_dataframe) else: topn = min(topn - 1, len(trials_dataframe)) for i in range(1, topn): print("avg_inc_acc: {} with {}.".format( trials_dataframe.iloc[i][result_index], _get_line_results(trials_dataframe, i, mapping_col_to_index))) return _get_line_results(trials_dataframe, 0, mapping_col_to_index)
def collect(self): """ Collects and cleans data on the running Tune experiment from the Tune logs so that users can see this information in the front-end client """ sub_dirs = os.listdir(self._logdir) job_names = filter( lambda d: os.path.isdir(os.path.join(self._logdir, d)), sub_dirs) self._trial_records = {} # search through all the sub_directories in log directory for job_name in job_names: analysis = Analysis(str(os.path.join(self._logdir, job_name))) df = analysis.dataframe() if len(df) == 0 or "trial_id" not in df.columns: continue # start TensorBoard server if not started yet if not self._tensor_board_started: tb = program.TensorBoard() tb.configure(argv=[None, "--logdir", self._logdir]) tb.launch() self._tensor_board_started = True self._available = True # make sure that data will convert to JSON without error df["trial_id_key"] = df["trial_id"].astype(str) df = df.fillna(0) trial_ids = df["trial_id"] for i, value in df["trial_id"].iteritems(): if type(value) != str and type(value) != int: trial_ids[i] = int(value) df["trial_id"] = trial_ids # convert df to python dict df = df.set_index("trial_id_key") trial_data = df.to_dict(orient="index") # clean data and update class attribute if len(trial_data) > 0: trial_data = self.clean_trials(trial_data, job_name) self._trial_records.update(trial_data) self.collect_errors(job_name, df)
def get_best_specific_params_dir(opt, layer, model, att_type): analysis = Analysis("../ray_tune/{}".format(opt['folder'])) df = analysis.dataframe(metric=opt['metric'], mode='max') print(df) df.columns = [c.replace('config/', '') for c in df.columns] print(df) # # newdf = df.loc[(df.num_layers == layers) & (df.model == model) & (df.att_type == att_type)] print(layer) print(df['num_layers'].unique()) print(model) print(df['model'].unique()) print(att_type) print(df['att_type'].unique()) newdf = df.loc[(df['num_layers'] == layer) & (df['model'] == model) & (df['att_type'] == att_type)] print(newdf) best_params_dir = newdf.sort_values( 'accuracy', ascending=False)['logdir'].iloc[opt['index']] return best_params_dir
def collect(self): """ Collects and cleans data on the running Tune experiment from the Tune logs so that users can see this information in the front-end client """ self._trial_records = {} self._errors = {} if not self._logdir: return # search through all the sub_directories in log directory analysis = Analysis(str(self._logdir)) df = analysis.dataframe() if len(df) == 0 or "trial_id" not in df.columns: return self._trials_available = True # make sure that data will convert to JSON without error df["trial_id_key"] = df["trial_id"].astype(str) df = df.fillna(0) trial_ids = df["trial_id"] for i, value in df["trial_id"].iteritems(): if type(value) != str and type(value) != int: trial_ids[i] = int(value) df["trial_id"] = trial_ids # convert df to python dict df = df.set_index("trial_id_key") trial_data = df.to_dict(orient="index") # clean data and update class attribute if len(trial_data) > 0: trial_data = self.clean_trials(trial_data) self._trial_records.update(trial_data) self.collect_errors(df)
def testDataframe(self): analysis = Analysis(self.test_dir) df = analysis.dataframe() self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEquals(df.shape[0], self.num_samples * 2)
def get_best_params_dir(opt): analysis = Analysis("../ray_tune/{}".format(opt['folder'])) df = analysis.dataframe(metric=opt['metric'], mode='max') best_params_dir = df.sort_values( 'accuracy', ascending=False)['logdir'].iloc[opt['index']] return best_params_dir
from ray.tune import Analysis import pandas as pd import os import numpy as np if __name__ == "__main__": analysis = Analysis( "/Users/shaobohu/Documents/我的坚果云/project/circles_experiment/TRY_ALL/Train") print(sorted(analysis.dataframe()['acc'].tolist())) print(analysis.get_best_config('acc', 'max'))
def get_best_info(exp_dir, metrics=['vl_score', 'vl_loss'], ascending=[False, True], mode='auto'): if mode == 'auto': analysis = Analysis(exp_dir, 'vl_score', 'max') df = analysis.dataframe() df = df.sort_values(metrics, ascending=ascending) trial_dir = df.iloc[0][-1] min_ = 10000 for f in os.listdir(trial_dir): if 'checkpoint' in f: idx = int(f.split('_')[1]) min_ = min(min_, idx) chk_file = os.path.join(trial_dir, f'checkpoint_{min_}', 'model.pth') with open(os.path.join(trial_dir, 'params.json')) as f: config = json.load(f) with open(os.path.join(trial_dir, 'result.json')) as f: res = [json.loads(i) for i in f] best_res = res[min_ - 1] return { 'trial_dir': trial_dir, 'chk_file': chk_file, 'config': config, 'tr_loss': best_res['tr_loss'], 'tr_score': best_res['tr_score'], 'vl_loss': best_res['vl_loss'], 'vl_score': best_res['vl_score'] } elif mode == 'manual': best_dict = { 'trial_dir': None, 'chk_file': None, 'config': None, 'tr_loss': float('inf'), 'tr_score': 0, 'vl_loss': float('inf'), 'vl_score': 0 } dirs = [ part_dir for part_dir in os.listdir(exp_dir) if os.path.isdir(os.path.join(exp_dir, part_dir)) ] for part_dir in dirs: trial_dir = os.path.join(exp_dir, part_dir) min_ = 400 for f in os.listdir(trial_dir): if 'checkpoint' in f: idx = int(f.split('_')[1]) min_ = min(min_, idx) with open(os.path.join(trial_dir, 'result.json')) as f: for i, d in enumerate(f): if i + 1 == min_: curr = json.loads(d) if best_dict['vl_score'] < curr[ 'vl_score']: #or (best_dict['vl_score'] == curr['vl_score'] and best_dict['vl_loss'] > curr['vl_loss']): with open(os.path.join(trial_dir, 'params.json')) as f: config = json.load(f) best_dict = { 'trial_dir': trial_dir, 'chk_file': os.path.join(trial_dir, f'checkpoint_{min_}/model.pth'), 'config': config, 'tr_loss': curr['tr_loss'], 'tr_score': curr['tr_score'], 'vl_loss': curr['vl_loss'], 'vl_score': curr['vl_score'] } return best_dict