Exemplo n.º 1
0
    def collect(self):
        """
        Collects and cleans data on the running Tune experiment from the
        Tune logs so that users can see this information in the front-end
        client
        """
        sub_dirs = os.listdir(self._logdir)
        job_names = filter(
            lambda d: os.path.isdir(os.path.join(self._logdir, d)), sub_dirs)

        self._trial_records = {}

        # search through all the sub_directories in log directory
        for job_name in job_names:
            analysis = Analysis(str(os.path.join(self._logdir, job_name)))
            df = analysis.dataframe()
            if len(df) == 0:
                continue

            self._available = True

            # make sure that data will convert to JSON without error
            df["trial_id"] = df["trial_id"].astype(str)
            df = df.fillna(0)

            # convert df to python dict
            df = df.set_index("trial_id")
            trial_data = df.to_dict(orient="index")

            # clean data and update class attribute
            if len(trial_data) > 0:
                trial_data = self.clean_trials(trial_data, job_name)
                self._trial_records.update(trial_data)
def analyse_ray_dump(ray_directory, topn, metric="avg_inc_acc"):
    if metric not in ("avg_inc_acc", "last_acc"):
        raise NotImplementedError("Unknown metric {}.".format(metric))

    ea = Analysis(ray_directory)
    trials_dataframe = ea.dataframe()
    trials_dataframe = trials_dataframe.sort_values(by=metric, ascending=False)

    mapping_col_to_index = {}
    result_index = -1
    for index, col in enumerate(trials_dataframe.columns):
        if col.startswith("config:"):
            mapping_col_to_index[col[7:]] = index
        elif col == metric:
            result_index = index

    print("Ray config: {}".format(ray_directory))
    print("Best Config:")
    print("{}: {} with {}.".format(
        metric, trials_dataframe.iloc[0][result_index],
        _get_line_results(trials_dataframe, 0, mapping_col_to_index)))
    print("\nFollowed by:")
    if topn < 0:
        topn = len(trials_dataframe)
    else:
        topn = min(topn - 1, len(trials_dataframe))

    for i in range(1, topn):
        print("avg_inc_acc: {} with {}.".format(
            trials_dataframe.iloc[i][result_index],
            _get_line_results(trials_dataframe, i, mapping_col_to_index)))

    return _get_line_results(trials_dataframe, 0, mapping_col_to_index)
Exemplo n.º 3
0
    def collect(self):
        """
        Collects and cleans data on the running Tune experiment from the
        Tune logs so that users can see this information in the front-end
        client
        """

        sub_dirs = os.listdir(self._logdir)
        job_names = filter(
            lambda d: os.path.isdir(os.path.join(self._logdir, d)), sub_dirs)

        self._trial_records = {}

        # search through all the sub_directories in log directory
        for job_name in job_names:
            analysis = Analysis(str(os.path.join(self._logdir, job_name)))
            df = analysis.dataframe()

            if len(df) == 0 or "trial_id" not in df.columns:
                continue

            # start TensorBoard server if not started yet
            if not self._tensor_board_started:
                tb = program.TensorBoard()
                tb.configure(argv=[None, "--logdir", self._logdir])
                tb.launch()
                self._tensor_board_started = True

            self._available = True

            # make sure that data will convert to JSON without error
            df["trial_id_key"] = df["trial_id"].astype(str)
            df = df.fillna(0)

            trial_ids = df["trial_id"]
            for i, value in df["trial_id"].iteritems():
                if type(value) != str and type(value) != int:
                    trial_ids[i] = int(value)

            df["trial_id"] = trial_ids

            # convert df to python dict
            df = df.set_index("trial_id_key")
            trial_data = df.to_dict(orient="index")

            # clean data and update class attribute
            if len(trial_data) > 0:
                trial_data = self.clean_trials(trial_data, job_name)
                self._trial_records.update(trial_data)

            self.collect_errors(job_name, df)
Exemplo n.º 4
0
def get_best_specific_params_dir(opt, layer, model, att_type):
    analysis = Analysis("../ray_tune/{}".format(opt['folder']))
    df = analysis.dataframe(metric=opt['metric'], mode='max')
    print(df)
    df.columns = [c.replace('config/', '') for c in df.columns]
    print(df)
    # # newdf = df.loc[(df.num_layers == layers) & (df.model == model) & (df.att_type == att_type)]
    print(layer)
    print(df['num_layers'].unique())
    print(model)
    print(df['model'].unique())
    print(att_type)
    print(df['att_type'].unique())

    newdf = df.loc[(df['num_layers'] == layer) & (df['model'] == model) &
                   (df['att_type'] == att_type)]
    print(newdf)
    best_params_dir = newdf.sort_values(
        'accuracy', ascending=False)['logdir'].iloc[opt['index']]
    return best_params_dir
Exemplo n.º 5
0
    def collect(self):
        """
        Collects and cleans data on the running Tune experiment from the
        Tune logs so that users can see this information in the front-end
        client
        """
        self._trial_records = {}
        self._errors = {}
        if not self._logdir:
            return

        # search through all the sub_directories in log directory
        analysis = Analysis(str(self._logdir))
        df = analysis.dataframe()

        if len(df) == 0 or "trial_id" not in df.columns:
            return

        self._trials_available = True

        # make sure that data will convert to JSON without error
        df["trial_id_key"] = df["trial_id"].astype(str)
        df = df.fillna(0)

        trial_ids = df["trial_id"]
        for i, value in df["trial_id"].iteritems():
            if type(value) != str and type(value) != int:
                trial_ids[i] = int(value)

        df["trial_id"] = trial_ids

        # convert df to python dict
        df = df.set_index("trial_id_key")
        trial_data = df.to_dict(orient="index")

        # clean data and update class attribute
        if len(trial_data) > 0:
            trial_data = self.clean_trials(trial_data)
            self._trial_records.update(trial_data)

        self.collect_errors(df)
Exemplo n.º 6
0
 def testDataframe(self):
     analysis = Analysis(self.test_dir)
     df = analysis.dataframe()
     self.assertTrue(isinstance(df, pd.DataFrame))
     self.assertEquals(df.shape[0], self.num_samples * 2)
Exemplo n.º 7
0
def get_best_params_dir(opt):
    analysis = Analysis("../ray_tune/{}".format(opt['folder']))
    df = analysis.dataframe(metric=opt['metric'], mode='max')
    best_params_dir = df.sort_values(
        'accuracy', ascending=False)['logdir'].iloc[opt['index']]
    return best_params_dir
Exemplo n.º 8
0
from ray.tune import Analysis
import pandas as pd
import os
import numpy as np

if __name__ == "__main__":
    analysis = Analysis(
        "/Users/shaobohu/Documents/我的坚果云/project/circles_experiment/TRY_ALL/Train")
    print(sorted(analysis.dataframe()['acc'].tolist()))
    print(analysis.get_best_config('acc', 'max'))
Exemplo n.º 9
0
def get_best_info(exp_dir,
                  metrics=['vl_score', 'vl_loss'],
                  ascending=[False, True],
                  mode='auto'):
    if mode == 'auto':
        analysis = Analysis(exp_dir, 'vl_score', 'max')
        df = analysis.dataframe()
        df = df.sort_values(metrics, ascending=ascending)
        trial_dir = df.iloc[0][-1]

        min_ = 10000
        for f in os.listdir(trial_dir):
            if 'checkpoint' in f:
                idx = int(f.split('_')[1])
                min_ = min(min_, idx)
        chk_file = os.path.join(trial_dir, f'checkpoint_{min_}', 'model.pth')
        with open(os.path.join(trial_dir, 'params.json')) as f:
            config = json.load(f)
        with open(os.path.join(trial_dir, 'result.json')) as f:
            res = [json.loads(i) for i in f]
            best_res = res[min_ - 1]

        return {
            'trial_dir': trial_dir,
            'chk_file': chk_file,
            'config': config,
            'tr_loss': best_res['tr_loss'],
            'tr_score': best_res['tr_score'],
            'vl_loss': best_res['vl_loss'],
            'vl_score': best_res['vl_score']
        }

    elif mode == 'manual':
        best_dict = {
            'trial_dir': None,
            'chk_file': None,
            'config': None,
            'tr_loss': float('inf'),
            'tr_score': 0,
            'vl_loss': float('inf'),
            'vl_score': 0
        }
        dirs = [
            part_dir for part_dir in os.listdir(exp_dir)
            if os.path.isdir(os.path.join(exp_dir, part_dir))
        ]
        for part_dir in dirs:
            trial_dir = os.path.join(exp_dir, part_dir)
            min_ = 400
            for f in os.listdir(trial_dir):
                if 'checkpoint' in f:
                    idx = int(f.split('_')[1])
                    min_ = min(min_, idx)
            with open(os.path.join(trial_dir, 'result.json')) as f:
                for i, d in enumerate(f):
                    if i + 1 == min_:
                        curr = json.loads(d)
                        if best_dict['vl_score'] < curr[
                                'vl_score']:  #or (best_dict['vl_score'] == curr['vl_score'] and best_dict['vl_loss'] > curr['vl_loss']):
                            with open(os.path.join(trial_dir,
                                                   'params.json')) as f:
                                config = json.load(f)
                            best_dict = {
                                'trial_dir':
                                trial_dir,
                                'chk_file':
                                os.path.join(trial_dir,
                                             f'checkpoint_{min_}/model.pth'),
                                'config':
                                config,
                                'tr_loss':
                                curr['tr_loss'],
                                'tr_score':
                                curr['tr_score'],
                                'vl_loss':
                                curr['vl_loss'],
                                'vl_score':
                                curr['vl_score']
                            }
        return best_dict