示例#1
0
def find_best(logdir, metric, mode):
    """Find the best experiment checkpoint as measured by a metric."""
    import logging

    from ray.tune.analysis import Analysis

    logging.getLogger("ray.tune").setLevel("ERROR")

    analysis = Analysis(logdir)
    best_logdir = analysis.get_best_logdir(metric, mode=mode)
    last_checkpoint_path = get_last_checkpoint_path(best_logdir)
    click.echo(last_checkpoint_path)
示例#2
0
def main():
    ray.init(logging_level=logging.WARNING)
    best = Analysis(
        "results/HparamSearch-Dim10",
        default_metric="true_value",
        default_mode="max",
    ).get_best_config()
    config = {
        "wandb_dir": os.getcwd(),
        "wandb_tags": "unstable controllable".split(),
        "seed": tune.grid_search(list(range(10))),
        "env_dim": tune.grid_search(list(range(2, 11))),
        "estimator": tune.grid_search("dpg maac".split()),
        "K": 8,
        "B": best["B"],
        "optimizer": "SGD",
        "learning_rate": best["learning_rate"],
        "clip_grad_norm": 100,
    }
    tune.run(
        SuboptimalityGap,
        config=config,
        num_samples=1,
        stop=dict(time_total_s=300),
        local_dir="./results",
    )
    ray.shutdown()
示例#3
0
文件: commands.py 项目: rlan/ray
def list_trials(experiment_path,
                sort=None,
                output=None,
                filter_op=None,
                info_keys=None,
                limit=None,
                desc=False):
    """Lists trials in the directory subtree starting at the given path.

    Args:
        experiment_path (str): Directory where trials are located.
            Like Experiment.local_dir/Experiment.name/experiment*.json.
        sort (list): Keys to sort by.
        output (str): Name of file where output is saved.
        filter_op (str): Filter operation in the format
            "<column> <operator> <value>".
        info_keys (list): Keys that are displayed.
        limit (int): Number of rows to display.
        desc (bool): Sort ascending vs. descending.
    """
    _check_tabulate()

    try:
        checkpoints_df = Analysis(experiment_path).dataframe()  # last result
    except TuneError as e:
        raise click.ClickException("No trial data found!") from e

    def key_filter(k):
        return k in DEFAULT_CLI_KEYS or k.startswith(CONFIG_PREFIX)

    col_keys = [k for k in checkpoints_df.columns if key_filter(k)]

    if info_keys:
        for k in info_keys:
            if k not in checkpoints_df.columns:
                raise click.ClickException("Provided key invalid: {}. "
                                           "Available keys: {}.".format(
                                               k, checkpoints_df.columns))
        col_keys = [k for k in checkpoints_df.columns if k in info_keys]

    if not col_keys:
        raise click.ClickException("No columns to output.")

    checkpoints_df = checkpoints_df[col_keys]
    if "last_update_time" in checkpoints_df:
        with pd.option_context("mode.use_inf_as_null", True):
            datetime_series = checkpoints_df["last_update_time"].dropna()

        datetime_series = datetime_series.apply(
            lambda t: datetime.fromtimestamp(t).strftime(TIMESTAMP_FORMAT))
        checkpoints_df["last_update_time"] = datetime_series

    if "logdir" in checkpoints_df:
        # logdir often too long to view in table, so drop experiment_path
        checkpoints_df["logdir"] = checkpoints_df["logdir"].str.replace(
            experiment_path, "")

    if filter_op:
        col, op, val = filter_op.split(" ")
        col_type = checkpoints_df[col].dtype
        if is_numeric_dtype(col_type):
            val = float(val)
        elif is_string_dtype(col_type):
            val = str(val)
        # TODO(Andrew): add support for datetime and boolean
        else:
            raise click.ClickException("Unsupported dtype for {}: {}".format(
                val, col_type))
        op = OPERATORS[op]
        filtered_index = op(checkpoints_df[col], val)
        checkpoints_df = checkpoints_df[filtered_index]

    if sort:
        for key in sort:
            if key not in checkpoints_df:
                raise click.ClickException("{} not in: {}".format(
                    key, list(checkpoints_df)))
        ascending = not desc
        checkpoints_df = checkpoints_df.sort_values(
            by=sort, ascending=ascending)

    if limit:
        checkpoints_df = checkpoints_df[:limit]

    print_format_output(checkpoints_df)

    if output:
        file_extension = os.path.splitext(output)[1].lower()
        if file_extension in (".p", ".pkl", ".pickle"):
            checkpoints_df.to_pickle(output)
        elif file_extension == ".csv":
            checkpoints_df.to_csv(output, index=False)
        else:
            raise click.ClickException(
                "Unsupported filetype: {}".format(output))
        click.secho("Output saved at {}".format(output), fg="green")
示例#4
0
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Apache License Version 2.0 for more details.
# ============================================================================

import sys
import pandas as pd
from absl import flags
from ray.tune.analysis import Analysis

FLAGS = flags.FLAGS

flags.DEFINE_string("logdir", "", "")
FLAGS(sys.argv)

analysis = Analysis(experiment_dir=FLAGS.logdir)
all_configs = analysis.get_all_configs()
rows = {}
for path, df in analysis.trial_dataframes.items():
    df = df.filter(regex="val|test|iteration").assign(
        metric=lambda x: x.filter(regex="val/(?:roc_auc|f1)").sum(axis=1))
    idx = df["metric"].idxmax()
    rows[path] = df.iloc[idx].to_dict()
    rows[path]["seed"] = all_configs[path]["seed"]

df = pd.DataFrame.from_dict(rows, orient="index")
df = df.filter(regex="test/(?:roc_auc|f1|recall)").rename(
    mapper=lambda x: x.split("/")[-1], axis=1).rename(columns={
        "roc_auc": "AUC",
        "f1": "F1",
        "recall": "Recall"
示例#5
0
def summarize_run(path: str,
                  cp_dir: str = 'None',
                  infer_cp_dir: bool = True,
                  train_metric: str = DEFAULT_TRAIN_METRIC,
                  eval_metric: str = DEFAULT_VALID_METRIC,
                  overwrite: bool = False,
                  return_analysis: bool = False) -> None:

    print(f'\nLoading experiment from:  \n{path}\n')

    #if not os.path.isfile(path):
    #    raise ValueError(f'Path does not exist or is directory:\n{path}')
    #if path[-5:] != '.json':
    #    raise ValueError(f'Not a .json file:\n{path}')

    summary_dir = os.path.join(path, 'summary')

    if os.path.isdir(summary_dir):
        if not overwrite:
            raise ValueError(
                f'Target directory `{summary_dir}` exists, use `--overwrite` to replace.'
            )
        shutil.rmtree(summary_dir)
    os.makedirs(summary_dir)

    path_split = path.split(BASE_PATH)[1].split('/')
    experiment = path_split[0]
    name = path_split[1]
    mode = path_split[2]

    if infer_cp_dir:
        cp_dir = os.path.join(DEFAULT_TARGET_BASE_DIR, experiment, name, mode,
                              'summary')
        print(f'\nInfering cp_dir:\n  {cp_dir}\n')
    if cp_dir != 'None':
        if cp_dir[-1] == '/':
            cp_dir = cp_dir[:-1]
        cp_dir_base = os.path.dirname(cp_dir)
        if os.path.isdir(cp_dir):
            shutil.rmtree(cp_dir)
        os.makedirs(cp_dir_base, exist_ok=True)

    exp = Analysis(path)

    if return_analysis:
        return exp

    configs = exp.dataframe()
    configs['rundir'] = [
        os.path.join(l, 'progress.csv') for l in configs['logdir']
    ]
    runs = []
    for i, f in enumerate(configs['rundir']):
        df = pd.read_csv(f)
        df['uid'] = i
        runs.append(df)
    runs = pd.concat(runs)

    best_run_dir = exp.get_best_logdir(eval_metric, mode='min')
    best_run_file = os.path.join(best_run_dir, 'progress.csv')
    best_run = df = pd.read_csv(best_run_file)

    print(f'Best run ID: {best_run_dir}')

    for f in ['json', 'pkl']:
        in_file = os.path.join(best_run_dir, f'params.{f}')
        out_file = os.path.join(summary_dir, f'best_params.{f}')
        copyfile(in_file, out_file)

    # Plot runs.
    plot_all(runs, eval_metric, os.path.join(summary_dir, 'all_runs.png'))
    plot_single(best_run, eval_metric, os.path.join(summary_dir,
                                                    'best_run.png'))

    if cp_dir != 'None':
        shutil.copytree(summary_dir, cp_dir)