示例#1
0
def test_train_save_load() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a discrete action space, running a single process, when training is resumed
    from a saved checkpoint.
    """

    # Check that desired results name is available.
    save_name = "test_train_save_load"
    check_results_name(save_name)

    # Load default training config and run training for the first time.
    with open(CARTPOLE_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["save_name"] = save_name

    # Run training to get checkpoint.
    train(config)

    # Modify config for second training run.
    config["load_from"] = save_name
    config["save_name"] = None
    config["baseline_metrics_filename"] = "cartpole_save_load"

    # Run resumed training.
    train(config)

    # Clean up.
    os.system("rm -rf %s" % save_dir_from_name(save_name))
示例#2
0
def test_train_cartpole() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a discrete action space, running a single process.
    """

    # Load default training config.
    with open(CARTPOLE_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["baseline_metrics_filename"] = "cartpole"

    # Run training.
    train(config)
示例#3
0
def test_train_MT10() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a continuous action space, running a single process.
    """

    # Load default training config.
    with open(MT10_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["baseline_metrics_filename"] = "MT10"

    # Run training.
    train(config)
示例#4
0
def test_train_MT10_splitting_v2() -> None:
    """
    Runs training and compares reward curve against saved baseline for a multi-task
    environment, running a single process, with splitting v2 network architecture.
    """

    # Load default training config.
    with open(SPLITTING_V2_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["baseline_metrics_filename"] = "MT10_splitting_v2"

    # Run training.
    train(config)
示例#5
0
def test_train_lunar_lander_gpu() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a continuous action space, running a single process.
    """

    # Load default training config.
    with open(LUNAR_LANDER_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["cuda"] = True
    config["baseline_metrics_filename"] = "lunar_lander_gpu"

    # Run training.
    train(config)
示例#6
0
def test_train_cartpole_recurrent() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a discrete action space, running a single process, with a recurrent policy.
    """

    # Load default training config.
    with open(CARTPOLE_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["architecture_config"]["recurrent"] = True
    config["architecture_config"]["recurrent_hidden_size"] = 64
    config["baseline_metrics_filename"] = "cartpole_recurrent"

    # Run training.
    train(config)
示例#7
0
def test_train_MT10_splitting_v2_exclude_task() -> None:
    """
    Runs training and compares reward curve against saved baseline for a multi-task
    environment, running a single process, with splitting v2 network architecture where
    task index is excluded from input.
    """

    # Load default training config.
    with open(SPLITTING_V2_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["architecture_config"]["include_task_index"] = False
    config["baseline_metrics_filename"] = "MT10_splitting_v2_exclude_task"

    # Run training.
    train(config)
示例#8
0
def test_train_MT10_trunk_exclude_task() -> None:
    """
    Runs training and compares reward curve against saved baseline for a multi-task
    environment, running a single process, with shared trunk architecture, while
    excluding the task index from the network input.
    """

    # Load default training config.
    with open(TRUNK_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["architecture_config"]["include_task_index"] = False
    config["baseline_metrics_filename"] = "MT10_trunk_exclude_task"

    # Run training.
    train(config)
示例#9
0
def test_train_MT10_save_memory() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a continuous action space, running a single process, while using the memory
    saving version of the MT10 benchmark.
    """

    # Load default training config.
    with open(MT10_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["baseline_metrics_filename"] = "MT10_save_memory"
    config["save_memory"] = True

    # Run training.
    train(config)
示例#10
0
def test_train_MT10_trunk_recurrent() -> None:
    """
    Runs training and compares reward curve against saved baseline for a multi-task
    environment, running a single process, with recurrent shared trunk architecture.
    """

    # Load default training config.
    with open(TRUNK_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["architecture_config"]["recurrent"] = True
    config["architecture_config"]["recurrent_hidden_size"] = 32
    config["baseline_metrics_filename"] = "MT10_trunk_recurrent"

    # Run training.
    train(config)
示例#11
0
def test_train_MT10_multi() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a continuous action space, running multiple processes.
    """

    # Load default training config.
    with open(MT10_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["num_updates"] = int(config["num_updates"] / MP_FACTOR)
    config["num_processes"] *= MP_FACTOR
    config["baseline_metrics_filename"] = "MT10_multi"

    # Run training.
    train(config)
示例#12
0
def test_train_cartpole_multi_gpu() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a discrete action space, running multiple processes.
    """

    # Load default training config.
    with open(CARTPOLE_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["num_updates"] = int(config["num_updates"] / MP_FACTOR)
    config["num_processes"] *= MP_FACTOR
    config["cuda"] = True
    config["baseline_metrics_filename"] = "cartpole_multi_gpu"

    # Run training.
    train(config)
示例#13
0
def test_train_cartpole_relu() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a discrete action space, running a single process, with a relu activation
    function in the networks.
    """

    # Load default training config.
    with open(CARTPOLE_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["architecture_config"]["actor_config"]["activation"] = "relu"
    config["architecture_config"]["critic_config"]["activation"] = "relu"
    config["baseline_metrics_filename"] = "cartpole_relu"

    # Run training.
    train(config)
示例#14
0
def test_train_lunar_lander_recurrent() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a continuous action space, running a single process, with a recurrent policy.
    """

    # Load default training config.
    with open(LUNAR_LANDER_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["architecture_config"]["recurrent"] = True
    config["architecture_config"]["recurrent_hidden_size"] = 64
    config["num_minibatch"] = 1
    config["baseline_metrics_filename"] = "lunar_lander_recurrent"

    # Run training.
    train(config)
示例#15
0
def test_train_MT10_multi_gpu_recurrent() -> None:
    """
    Runs training and compares reward curve against saved baseline for an environment
    with a continuous action space, running multiple processes, with a recurrent policy.
    """

    # Load default training config.
    with open(MT10_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config.
    config["num_updates"] = int(config["num_updates"] / MP_FACTOR)
    config["num_processes"] *= MP_FACTOR
    config["cuda"] = True
    config["architecture_config"]["recurrent"] = True
    config["architecture_config"]["recurrent_hidden_size"] = 64
    config["baseline_metrics_filename"] = "MT10_multi_gpu_recurrent"

    # Run training.
    train(config)
示例#16
0
def test_save_load_multi() -> None:
    """
    Test saving/loading functionality for training when multiprocessing.
    """

    # Check that desired results name is available.
    save_name = "test_save_load_multi"
    check_results_name(save_name)

    # Load default training config.
    with open(CARTPOLE_CONFIG_PATH, "r") as config_file:
        config = json.load(config_file)

    # Modify default training config and run training to save checkpoint.
    config["save_name"] = save_name
    config["num_updates"] = int(config["num_updates"] / MP_FACTOR)
    config["num_processes"] *= MP_FACTOR
    checkpoint = train(config)
    first_metrics = checkpoint["metrics"].state()

    # Run training for the second time, and load from checkpoint.
    config["load_from"] = save_name
    config["save_name"] = None
    config["num_updates"] *= 2
    checkpoint = train(config)
    second_metrics = checkpoint["metrics"].state()

    # Compare metrics.
    assert list(first_metrics.keys()) == list(second_metrics.keys())
    for metric_name in first_metrics.keys():
        first_metric = first_metrics[metric_name]
        second_metric = second_metrics[metric_name]

        assert first_metric["maximum"] <= second_metric["maximum"]
        for key in ["history", "mean", "stdev"]:
            n = len(first_metric[key])
            assert first_metric[key][:n] == second_metric[key][:n]

    # Clean up.
    os.system("rm -rf %s" % save_dir_from_name(save_name))
示例#17
0
文件: main.py 项目: mtcrawshaw/meta
if __name__ == "__main__":

    # Parse config filename from command line arguments.
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        type=str,
        help="Command to run. Either 'train' or 'tune'.",
    )
    parser.add_argument(
        "config_filename",
        type=str,
        help="Name of config file to load from.",
    )
    args = parser.parse_args()

    # Load config file.
    with open(args.config_filename, "r") as config_file:
        config = json.load(config_file)

    # Run specified command.
    if args.command == "train":
        train(config)
    elif args.command == "tune":
        tune(config)
    elif args.command == "meta_train":
        meta_train(config)
    else:
        raise ValueError("Unsupported command: '%s'" % args.command)
示例#18
0
def meta_train(config: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
    """
    Main function for meta_train.py, runs meta-training and meta-testing over the
    train() function from meta/train/train.py. The expected entries of `config` are
    documented below. Returns a dictionary holding values of performance metrics from
    training and evaluation.

    Parameters
    ----------
    meta_train_config : Dict[str, Any]
        Config to pass to train() for meta-training, without common settings listed
        below such as `cuda` and `seed`.
    meta_test_config : Dict[str, Any]
        Config to pass to train() for meta-testing, without common settings listed below
        such as `cuda` and `seed`. Note that if any architecture configuration is
        present within `meta_test_config`, it will be ignored and instead the
        architecture specified in `meta_train_config` will be used.
    cuda : bool
        Whether or not to train on GPU.
    seed : int
        Random seed.
    load_from : str
        Path of checkpoint file (as saved by this function) to load from in order to
        resume training. NOTE: This should be included in the config file but isn't yet
        supported for meta-training.
    print_freq : int
        Number of training iterations between metric printing.
    save_freq : int
        Number of training iterations between saving of intermediate progress. If None,
        no saving of intermediate progress will occur. Note that if save_name is None,
        then this value will just be ignored.
    save_name : str
        Name to save experiments under. Each experiment (meta-train and meta-test) will
        be given their own value of `save_name` based on this one.
    """

    # Check for unsupported options.
    unsupported_options = ["load_from"]
    for unsupported in unsupported_options:
        if config[unsupported] is not None:
            raise NotImplementedError
    if config["meta_train_config"]["architecture_config"]["include_task_index"]:
        raise NotImplementedError

    # Add common settings to meta-train config and meta-test config.
    meta_train_config = config["meta_train_config"]
    meta_test_config = config["meta_test_config"]
    common_settings = list(config.keys())
    common_settings.remove("meta_train_config")
    common_settings.remove("meta_test_config")
    common_settings.remove("save_name")
    for setting in common_settings:
        meta_train_config[setting] = config[setting]
        meta_test_config[setting] = config[setting]

    # Construct save names for meta-training and meta-testing.
    if config["save_name"] is None:
        meta_train_config["save_name"] = None
        meta_test_config["save_name"] = None
    else:
        meta_train_config["save_name"] = "%s_meta_train" % config["save_name"]
        meta_test_config["save_name"] = "%s_meta_test" % config["save_name"]

    # Perform meta-training.
    print("Meta-Training:")
    checkpoint = train(meta_train_config)

    # Convert policy for meta-test time.
    num_test_tasks = get_num_tasks(meta_test_config["env_name"])
    policy = checkpoint["policy"]
    policy.meta_conversion(num_test_tasks)

    # Perform meta-testing.
    print("\nMeta-Testing:")
    checkpoint = train(meta_test_config, policy)

    return checkpoint
示例#19
0
文件: tune.py 项目: mtcrawshaw/meta
def train_single_config(
    train_config: Dict[str, Any],
    trials_per_config: int,
    fitness_fn: Callable,
    seed: int,
    checkpoint: Dict[str, Any],
    save_dir: str,
    config_save_name: str = None,
    metrics_filename: str = None,
    baseline_metrics_filename: str = None,
    early_stop_trials: int = None,
) -> Tuple[float, Dict[str, Any], Dict[str, Any]]:
    """
    Run training with a fixed config for ``trials_per_config`` trials, and return
    fitness and a dictionary holding results.
    """

    # Load in checkpoint, if necessary.
    fitness = 0.0
    trial = 0
    config_results: Dict[str, Any] = {}
    config_results["trials"] = []
    config_results["config"] = dict(train_config)
    if checkpoint is not None and checkpoint["config_checkpoint"] is not None:
        config_results = checkpoint["config_checkpoint"]["config_results"]
        fitness = checkpoint["config_checkpoint"]["fitness"]
        trial = checkpoint["config_checkpoint"]["trial"]

    # Perform training and compute resulting fitness for multiple trials.
    while trial < trials_per_config:

        # Check for early stop.
        if early_stop_trials is not None and trial == early_stop_trials:
            break

        trial_results: Dict[str, Any] = {}

        # Set trial name, seed, and metrics filenames for saving/comparison, if
        # neccessary.
        get_save_name = (
            lambda name: "%s_%d" % (name, trial) if name is not None else None
        )
        train_config["save_name"] = get_save_name(config_save_name)
        train_config["metrics_filename"] = get_save_name(metrics_filename)
        train_config["baseline_metrics_filename"] = get_save_name(
            baseline_metrics_filename
        )
        train_config["seed"] = seed + trial

        # Run training and get fitness.
        checkpoint = train(train_config)
        metrics = checkpoint["metrics"].state()
        trial_fitness = fitness_fn(metrics)
        fitness += trial_fitness

        # Fill in trial results.
        trial_results["trial"] = trial
        trial_results["metrics"] = dict(metrics)
        trial_results["fitness"] = trial_fitness
        config_results["trials"].append(dict(trial_results))

        # Save checkpoint, if necessary. We increment the trial index here so that when
        # training resumes, it will start with the next trial after the last completed
        # one.
        if save_dir is not None:
            config_checkpoint: Dict[str, Any] = {}
            config_checkpoint["config_results"] = dict(config_results)
            config_checkpoint["fitness"] = fitness
            config_checkpoint["trial"] = trial + 1
            checkpoint["config_checkpoint"] = dict(config_checkpoint)

            checkpoint_filename = os.path.join(save_dir, "checkpoint.pkl")
            with open(checkpoint_filename, "wb") as checkpoint_file:
                pickle.dump(checkpoint, checkpoint_file)

        # Update trial index.
        trial += 1

    fitness /= trials_per_config
    config_results["fitness"] = fitness

    return fitness, config_results, checkpoint