def test_hyperparam_optim_cmaes(): # Define train env train_env = (GridWorld, {}) # Run AgentManager stats_agent = AgentManager( DummyAgent, train_env, init_kwargs={}, fit_budget=1, eval_kwargs={"eval_horizon": 5}, n_fit=4, ) # test hyperparameter optimization with CMA-ES sampler stats_agent.optimize_hyperparams(sampler_method="cmaes", n_trials=5) stats_agent.clear_output_dir()
def test_hyperparam_optim_tpe(): # Define trainenv train_env = (GridWorld, {}) # Run AgentManager stats_agent = AgentManager( DummyAgent, train_env, fit_budget=1, init_kwargs={}, eval_kwargs={"eval_horizon": 5}, n_fit=4, ) # test hyperparameter optimization with TPE sampler # using hyperopt default values sampler_kwargs = TPESampler.hyperopt_parameters() stats_agent.optimize_hyperparams(sampler_kwargs=sampler_kwargs, n_trials=5) stats_agent.clear_output_dir()
def test_discount_optimization(): class ValueIterationAgentToOptimize(ValueIterationAgent): @classmethod def sample_parameters(cls, trial): """ Sample hyperparameters for hyperparam optimization using Optuna (https://optuna.org/) """ gamma = trial.suggest_categorical("gamma", [0.1, 0.99]) return {"gamma": gamma} env = ( GridWorld, dict( nrows=3, ncols=10, reward_at={ (1, 1): 0.1, (2, 9): 1.0 }, walls=((1, 4), (2, 4), (1, 5)), success_probability=0.9, ), ) vi_params = {"gamma": 0.1, "epsilon": 1e-3} vi_stats = AgentManager( ValueIterationAgentToOptimize, env, fit_budget=0, eval_kwargs=dict(eval_horizon=20), init_kwargs=vi_params, n_fit=4, seed=123, ) vi_stats.optimize_hyperparams(n_trials=5, n_fit=1, sampler_method="random", pruner_method="none") assert vi_stats.optuna_study vi_stats.clear_output_dir()
def test_hyperparam_optim_random(parallelization): # Define train env train_env = (GridWorld, {}) # Run AgentManager stats_agent = AgentManager( DummyAgent, train_env, init_kwargs={}, fit_budget=1, eval_kwargs={"eval_horizon": 5}, n_fit=4, parallelization=parallelization, ) # test hyperparameter optimization with random sampler stats_agent.optimize_hyperparams(sampler_method="random", n_trials=5, optuna_parallelization=parallelization) stats_agent.clear_output_dir()
def test_hyperparam_optim_grid(): # Define train env train_env = (GridWorld, {}) # Run AgentManager stats_agent = AgentManager( DummyAgent, train_env, init_kwargs={}, fit_budget=1, eval_kwargs={"eval_horizon": 5}, n_fit=4, ) # test hyperparameter optimization with grid sampler search_space = { "hyperparameter1": [1, 2, 3], "hyperparameter2": [-5, 0, 5] } sampler_kwargs = {"search_space": search_space} stats_agent.optimize_hyperparams(n_trials=3 * 3, sampler_method="grid", sampler_kwargs=sampler_kwargs) stats_agent.clear_output_dir()
env = VecFrameStack(env, n_stack=4) env = ScalarizeEnvWrapper(env) return env # # Testing single agent # if __name__ == "__main__": # # Training several agents and comparing different hyperparams # stats = AgentManager( A2CAgent, train_env=(env_constructor, None), eval_env=(eval_env_constructor, None), eval_kwargs=dict(eval_horizon=200), agent_name="A2C baseline", fit_budget=5000, init_kwargs=dict(policy="CnnPolicy", verbose=10), n_fit=4, parallelization="process", output_dir="dev/stable_baselines_atari", seed=123, ) stats.fit() stats.optimize_hyperparams(timeout=60, n_fit=2)
agent_name="A2C optimized", init_kwargs=dict(policy="MlpPolicy", verbose=1), fit_kwargs=dict(log_interval=1000), fit_budget=2500, eval_kwargs=dict(eval_horizon=400), n_fit=4, parallelization="process", output_dir="dev/stable_baselines", seed=456, ) # Optimize hyperparams (600 seconds) stats_alternative.optimize_hyperparams( timeout=600, n_optuna_workers=2, n_fit=2, optuna_parallelization="process", fit_fraction=1.0, ) # Fit everything in parallel multimanagers = MultipleManagers() multimanagers.append(stats) multimanagers.append(stats_alternative) multimanagers.run() # Plot policy evaluation out = evaluate_agents(multimanagers.managers) print(out)
def execute_message(message: interface.Message, resources: interface.Resources) -> interface.Message: response = interface.Message.create(command=interface.Command.ECHO) # LIST_RESOURCES if message.command == interface.Command.LIST_RESOURCES: info = {} for rr in resources: info[rr] = resources[rr]["description"] response = interface.Message.create(info=info) # AGENT_MANAGER_CREATE_INSTANCE elif message.command == interface.Command.AGENT_MANAGER_CREATE_INSTANCE: params = message.params base_dir = pathlib.Path(metadata_utils.RLBERRY_DEFAULT_DATA_DIR) if "output_dir" in params: params[ "output_dir"] = base_dir / "server_data" / params["output_dir"] else: params["output_dir"] = base_dir / "server_data/" agent_manager = AgentManager(**params) filename = str(agent_manager.save()) response = interface.Message.create(info=dict( filename=filename, agent_name=agent_manager.agent_name, output_dir=str(agent_manager.output_dir).replace( "server_data/", "client_data/"), )) del agent_manager # AGENT_MANAGER_FIT elif message.command == interface.Command.AGENT_MANAGER_FIT: filename = message.params["filename"] budget = message.params["budget"] extra_params = message.params["extra_params"] agent_manager = AgentManager.load(filename) agent_manager.fit(budget, **extra_params) agent_manager.save() response = interface.Message.create(command=interface.Command.ECHO) del agent_manager # AGENT_MANAGER_EVAL elif message.command == interface.Command.AGENT_MANAGER_EVAL: filename = message.params["filename"] agent_manager = AgentManager.load(filename) eval_output = agent_manager.eval_agents( message.params["n_simulations"]) response = interface.Message.create(data=dict(output=eval_output)) del agent_manager # AGENT_MANAGER_CLEAR_OUTPUT_DIR elif message.command == interface.Command.AGENT_MANAGER_CLEAR_OUTPUT_DIR: filename = message.params["filename"] agent_manager = AgentManager.load(filename) agent_manager.clear_output_dir() response = interface.Message.create( message=f"Cleared output dir: {agent_manager.output_dir}") del agent_manager # AGENT_MANAGER_CLEAR_HANDLERS elif message.command == interface.Command.AGENT_MANAGER_CLEAR_HANDLERS: filename = message.params["filename"] agent_manager = AgentManager.load(filename) agent_manager.clear_handlers() agent_manager.save() response = interface.Message.create( message=f"Cleared handlers: {filename}") del agent_manager # AGENT_MANAGER_SET_WRITER elif message.command == interface.Command.AGENT_MANAGER_SET_WRITER: filename = message.params["filename"] agent_manager = AgentManager.load(filename) agent_manager.set_writer(**message.params["kwargs"]) agent_manager.save() del agent_manager # AGENT_MANAGER_OPTIMIZE_HYPERPARAMS elif message.command == interface.Command.AGENT_MANAGER_OPTIMIZE_HYPERPARAMS: filename = message.params["filename"] agent_manager = AgentManager.load(filename) best_params_dict = agent_manager.optimize_hyperparams( **message.params["kwargs"]) agent_manager.save() del agent_manager response = interface.Message.create(data=best_params_dict) # AGENT_MANAGER_GET_WRITER_DATA elif message.command == interface.Command.AGENT_MANAGER_GET_WRITER_DATA: # writer scalar data filename = message.params["filename"] agent_manager = AgentManager.load(filename) writer_data = agent_manager.get_writer_data() writer_data = writer_data or dict() for idx in writer_data: writer_data[idx] = writer_data[idx].to_csv(index=False) # tensoboard data tensorboard_bin_data = None if agent_manager.tensorboard_dir is not None: tensorboard_zip_file = rlberry.utils.io.zipdir( agent_manager.tensorboard_dir, agent_manager.output_dir / "tensorboard_data.zip", ) if tensorboard_zip_file is not None: tensorboard_bin_data = open(tensorboard_zip_file, "rb").read() tensorboard_bin_data = base64.b64encode( tensorboard_bin_data).decode("ascii") response = interface.Message.create( data=dict(writer_data=writer_data, tensorboard_bin_data=tensorboard_bin_data)) del agent_manager # end return response
# Run AgentManager and save results # -------------------------------- manager = AgentManager( REINFORCEAgent, train_env, fit_budget=N_EPISODES, init_kwargs=params, eval_kwargs=eval_kwargs, n_fit=4, ) # hyperparam optim with multiple threads manager.optimize_hyperparams( n_trials=5, timeout=None, n_fit=2, sampler_method="optuna_default", optuna_parallelization="thread", ) initial_n_trials = len(manager.optuna_study.trials) # save manager_fname = manager.save() del manager # load manager = AgentManager.load(manager_fname) # continue previous optimization, now with 120s of timeout and multiprocessing manager.optimize_hyperparams(