Exemplo n.º 1
0
def test_hyperparam_optim_cmaes():
    # Define train env
    train_env = (GridWorld, {})

    # Run AgentManager
    stats_agent = AgentManager(
        DummyAgent,
        train_env,
        init_kwargs={},
        fit_budget=1,
        eval_kwargs={"eval_horizon": 5},
        n_fit=4,
    )

    # test hyperparameter optimization with CMA-ES sampler
    stats_agent.optimize_hyperparams(sampler_method="cmaes", n_trials=5)
    stats_agent.clear_output_dir()
Exemplo n.º 2
0
def test_hyperparam_optim_tpe():
    # Define trainenv
    train_env = (GridWorld, {})

    # Run AgentManager
    stats_agent = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=1,
        init_kwargs={},
        eval_kwargs={"eval_horizon": 5},
        n_fit=4,
    )

    # test hyperparameter optimization with TPE sampler
    # using hyperopt default values
    sampler_kwargs = TPESampler.hyperopt_parameters()
    stats_agent.optimize_hyperparams(sampler_kwargs=sampler_kwargs, n_trials=5)
    stats_agent.clear_output_dir()
Exemplo n.º 3
0
def test_discount_optimization():
    class ValueIterationAgentToOptimize(ValueIterationAgent):
        @classmethod
        def sample_parameters(cls, trial):
            """
            Sample hyperparameters for hyperparam optimization using Optuna (https://optuna.org/)
            """
            gamma = trial.suggest_categorical("gamma", [0.1, 0.99])
            return {"gamma": gamma}

    env = (
        GridWorld,
        dict(
            nrows=3,
            ncols=10,
            reward_at={
                (1, 1): 0.1,
                (2, 9): 1.0
            },
            walls=((1, 4), (2, 4), (1, 5)),
            success_probability=0.9,
        ),
    )

    vi_params = {"gamma": 0.1, "epsilon": 1e-3}

    vi_stats = AgentManager(
        ValueIterationAgentToOptimize,
        env,
        fit_budget=0,
        eval_kwargs=dict(eval_horizon=20),
        init_kwargs=vi_params,
        n_fit=4,
        seed=123,
    )

    vi_stats.optimize_hyperparams(n_trials=5,
                                  n_fit=1,
                                  sampler_method="random",
                                  pruner_method="none")

    assert vi_stats.optuna_study
    vi_stats.clear_output_dir()
Exemplo n.º 4
0
def test_hyperparam_optim_random(parallelization):
    # Define train env
    train_env = (GridWorld, {})

    # Run AgentManager
    stats_agent = AgentManager(
        DummyAgent,
        train_env,
        init_kwargs={},
        fit_budget=1,
        eval_kwargs={"eval_horizon": 5},
        n_fit=4,
        parallelization=parallelization,
    )

    # test hyperparameter optimization with random sampler
    stats_agent.optimize_hyperparams(sampler_method="random",
                                     n_trials=5,
                                     optuna_parallelization=parallelization)
    stats_agent.clear_output_dir()
Exemplo n.º 5
0
def test_hyperparam_optim_grid():
    # Define train env
    train_env = (GridWorld, {})

    # Run AgentManager
    stats_agent = AgentManager(
        DummyAgent,
        train_env,
        init_kwargs={},
        fit_budget=1,
        eval_kwargs={"eval_horizon": 5},
        n_fit=4,
    )

    # test hyperparameter optimization with grid sampler
    search_space = {
        "hyperparameter1": [1, 2, 3],
        "hyperparameter2": [-5, 0, 5]
    }
    sampler_kwargs = {"search_space": search_space}
    stats_agent.optimize_hyperparams(n_trials=3 * 3,
                                     sampler_method="grid",
                                     sampler_kwargs=sampler_kwargs)
    stats_agent.clear_output_dir()
Exemplo n.º 6
0
    env = VecFrameStack(env, n_stack=4)
    env = ScalarizeEnvWrapper(env)
    return env


#
# Testing single agent
#

if __name__ == "__main__":
    #
    # Training several agents and comparing different hyperparams
    #

    stats = AgentManager(
        A2CAgent,
        train_env=(env_constructor, None),
        eval_env=(eval_env_constructor, None),
        eval_kwargs=dict(eval_horizon=200),
        agent_name="A2C baseline",
        fit_budget=5000,
        init_kwargs=dict(policy="CnnPolicy", verbose=10),
        n_fit=4,
        parallelization="process",
        output_dir="dev/stable_baselines_atari",
        seed=123,
    )

    stats.fit()
    stats.optimize_hyperparams(timeout=60, n_fit=2)
        agent_name="A2C optimized",
        init_kwargs=dict(policy="MlpPolicy", verbose=1),
        fit_kwargs=dict(log_interval=1000),
        fit_budget=2500,
        eval_kwargs=dict(eval_horizon=400),
        n_fit=4,
        parallelization="process",
        output_dir="dev/stable_baselines",
        seed=456,
    )

    # Optimize hyperparams (600 seconds)
    stats_alternative.optimize_hyperparams(
        timeout=600,
        n_optuna_workers=2,
        n_fit=2,
        optuna_parallelization="process",
        fit_fraction=1.0,
    )

    # Fit everything in parallel
    multimanagers = MultipleManagers()
    multimanagers.append(stats)
    multimanagers.append(stats_alternative)

    multimanagers.run()

    # Plot policy evaluation
    out = evaluate_agents(multimanagers.managers)
    print(out)
Exemplo n.º 8
0
def execute_message(message: interface.Message,
                    resources: interface.Resources) -> interface.Message:
    response = interface.Message.create(command=interface.Command.ECHO)
    # LIST_RESOURCES
    if message.command == interface.Command.LIST_RESOURCES:
        info = {}
        for rr in resources:
            info[rr] = resources[rr]["description"]
        response = interface.Message.create(info=info)
    # AGENT_MANAGER_CREATE_INSTANCE
    elif message.command == interface.Command.AGENT_MANAGER_CREATE_INSTANCE:
        params = message.params
        base_dir = pathlib.Path(metadata_utils.RLBERRY_DEFAULT_DATA_DIR)
        if "output_dir" in params:
            params[
                "output_dir"] = base_dir / "server_data" / params["output_dir"]
        else:
            params["output_dir"] = base_dir / "server_data/"
        agent_manager = AgentManager(**params)
        filename = str(agent_manager.save())
        response = interface.Message.create(info=dict(
            filename=filename,
            agent_name=agent_manager.agent_name,
            output_dir=str(agent_manager.output_dir).replace(
                "server_data/", "client_data/"),
        ))
        del agent_manager
    # AGENT_MANAGER_FIT
    elif message.command == interface.Command.AGENT_MANAGER_FIT:
        filename = message.params["filename"]
        budget = message.params["budget"]
        extra_params = message.params["extra_params"]
        agent_manager = AgentManager.load(filename)
        agent_manager.fit(budget, **extra_params)
        agent_manager.save()
        response = interface.Message.create(command=interface.Command.ECHO)
        del agent_manager
    # AGENT_MANAGER_EVAL
    elif message.command == interface.Command.AGENT_MANAGER_EVAL:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        eval_output = agent_manager.eval_agents(
            message.params["n_simulations"])
        response = interface.Message.create(data=dict(output=eval_output))
        del agent_manager
    # AGENT_MANAGER_CLEAR_OUTPUT_DIR
    elif message.command == interface.Command.AGENT_MANAGER_CLEAR_OUTPUT_DIR:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.clear_output_dir()
        response = interface.Message.create(
            message=f"Cleared output dir: {agent_manager.output_dir}")
        del agent_manager
    # AGENT_MANAGER_CLEAR_HANDLERS
    elif message.command == interface.Command.AGENT_MANAGER_CLEAR_HANDLERS:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.clear_handlers()
        agent_manager.save()
        response = interface.Message.create(
            message=f"Cleared handlers: {filename}")
        del agent_manager
    # AGENT_MANAGER_SET_WRITER
    elif message.command == interface.Command.AGENT_MANAGER_SET_WRITER:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.set_writer(**message.params["kwargs"])
        agent_manager.save()
        del agent_manager
    # AGENT_MANAGER_OPTIMIZE_HYPERPARAMS
    elif message.command == interface.Command.AGENT_MANAGER_OPTIMIZE_HYPERPARAMS:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        best_params_dict = agent_manager.optimize_hyperparams(
            **message.params["kwargs"])
        agent_manager.save()
        del agent_manager
        response = interface.Message.create(data=best_params_dict)
    # AGENT_MANAGER_GET_WRITER_DATA
    elif message.command == interface.Command.AGENT_MANAGER_GET_WRITER_DATA:
        # writer scalar data
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        writer_data = agent_manager.get_writer_data()
        writer_data = writer_data or dict()
        for idx in writer_data:
            writer_data[idx] = writer_data[idx].to_csv(index=False)
        # tensoboard data
        tensorboard_bin_data = None
        if agent_manager.tensorboard_dir is not None:
            tensorboard_zip_file = rlberry.utils.io.zipdir(
                agent_manager.tensorboard_dir,
                agent_manager.output_dir / "tensorboard_data.zip",
            )
            if tensorboard_zip_file is not None:
                tensorboard_bin_data = open(tensorboard_zip_file, "rb").read()
                tensorboard_bin_data = base64.b64encode(
                    tensorboard_bin_data).decode("ascii")
        response = interface.Message.create(
            data=dict(writer_data=writer_data,
                      tensorboard_bin_data=tensorboard_bin_data))
        del agent_manager
    # end
    return response
Exemplo n.º 9
0
    # Run AgentManager and save results
    # --------------------------------
    manager = AgentManager(
        REINFORCEAgent,
        train_env,
        fit_budget=N_EPISODES,
        init_kwargs=params,
        eval_kwargs=eval_kwargs,
        n_fit=4,
    )

    # hyperparam optim with multiple threads
    manager.optimize_hyperparams(
        n_trials=5,
        timeout=None,
        n_fit=2,
        sampler_method="optuna_default",
        optuna_parallelization="thread",
    )

    initial_n_trials = len(manager.optuna_study.trials)

    # save
    manager_fname = manager.save()
    del manager

    # load
    manager = AgentManager.load(manager_fname)

    # continue previous optimization, now with 120s of timeout and multiprocessing
    manager.optimize_hyperparams(