Exemplo n.º 1
0
def check_save_load(agent, env="continuous_state", init_kwargs=None):
    """
    Check that the agent save a non-empty file and can load.

    Parameters
    ----------
    agent: rlberry agent module
        Agent class to test.
    env: tuple (env_ctor, env_kwargs) or str in {"continuous_state", "discrete_state"}, default="continuous_state"
        if tuple, env is the constructor and keywords of the env on which to test.
        if str in {"continuous_state", "discrete_state"}, we use a default Benchmark environment.
    init_kwargs : dict
        Arguments required by the agent's constructor.
    """
    if init_kwargs is None:
        init_kwargs = {}

    train_env = _make_env(env)
    env = train_env[0](**train_env[1])
    with tempfile.TemporaryDirectory() as tmpdirname:
        agent = AgentManager(
            agent,
            train_env,
            fit_budget=5,
            n_fit=1,
            seed=SEED,
            init_kwargs=init_kwargs,
            output_dir=tmpdirname,
        )
        agent.fit(3)
        assert (
            os.path.getsize(str(agent.output_dir_) + "/agent_handlers/idx_0.pickle") > 1
        ), "The saved file is empty."
        try:
            agent.load(str(agent.output_dir_) + "/agent_handlers/idx_0.pickle")
        except Exception:
            raise RuntimeError("Failed to load the agent file.")
Exemplo n.º 2
0
        PPOAgent,
        train_env,
        fit_budget=N_EPISODES,
        init_kwargs=params_ppo,
        eval_kwargs=eval_kwargs,
        n_fit=4,
        output_dir="dev/",
        parallelization="process",
    )
    ppo_stats.fit()  # fit the 4 agents
    ppo_stats_fname = ppo_stats.save()
    del ppo_stats

    # -------------------------------
    # Load and plot results
    # --------------------------------
    ppo_stats = AgentManager.load(ppo_stats_fname)

    # learning curves
    plot_writer_data(
        ppo_stats,
        tag="episode_rewards",
        preprocess_func=np.cumsum,
        title="Cumulative Rewards",
        show=False,
    )

    # compare final policies
    output = evaluate_agents([ppo_stats], n_simulations=15)
    print(output)
Exemplo n.º 3
0
def execute_message(message: interface.Message,
                    resources: interface.Resources) -> interface.Message:
    response = interface.Message.create(command=interface.Command.ECHO)
    # LIST_RESOURCES
    if message.command == interface.Command.LIST_RESOURCES:
        info = {}
        for rr in resources:
            info[rr] = resources[rr]["description"]
        response = interface.Message.create(info=info)
    # AGENT_MANAGER_CREATE_INSTANCE
    elif message.command == interface.Command.AGENT_MANAGER_CREATE_INSTANCE:
        params = message.params
        base_dir = pathlib.Path(metadata_utils.RLBERRY_DEFAULT_DATA_DIR)
        if "output_dir" in params:
            params[
                "output_dir"] = base_dir / "server_data" / params["output_dir"]
        else:
            params["output_dir"] = base_dir / "server_data/"
        agent_manager = AgentManager(**params)
        filename = str(agent_manager.save())
        response = interface.Message.create(info=dict(
            filename=filename,
            agent_name=agent_manager.agent_name,
            output_dir=str(agent_manager.output_dir).replace(
                "server_data/", "client_data/"),
        ))
        del agent_manager
    # AGENT_MANAGER_FIT
    elif message.command == interface.Command.AGENT_MANAGER_FIT:
        filename = message.params["filename"]
        budget = message.params["budget"]
        extra_params = message.params["extra_params"]
        agent_manager = AgentManager.load(filename)
        agent_manager.fit(budget, **extra_params)
        agent_manager.save()
        response = interface.Message.create(command=interface.Command.ECHO)
        del agent_manager
    # AGENT_MANAGER_EVAL
    elif message.command == interface.Command.AGENT_MANAGER_EVAL:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        eval_output = agent_manager.eval_agents(
            message.params["n_simulations"])
        response = interface.Message.create(data=dict(output=eval_output))
        del agent_manager
    # AGENT_MANAGER_CLEAR_OUTPUT_DIR
    elif message.command == interface.Command.AGENT_MANAGER_CLEAR_OUTPUT_DIR:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.clear_output_dir()
        response = interface.Message.create(
            message=f"Cleared output dir: {agent_manager.output_dir}")
        del agent_manager
    # AGENT_MANAGER_CLEAR_HANDLERS
    elif message.command == interface.Command.AGENT_MANAGER_CLEAR_HANDLERS:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.clear_handlers()
        agent_manager.save()
        response = interface.Message.create(
            message=f"Cleared handlers: {filename}")
        del agent_manager
    # AGENT_MANAGER_SET_WRITER
    elif message.command == interface.Command.AGENT_MANAGER_SET_WRITER:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.set_writer(**message.params["kwargs"])
        agent_manager.save()
        del agent_manager
    # AGENT_MANAGER_OPTIMIZE_HYPERPARAMS
    elif message.command == interface.Command.AGENT_MANAGER_OPTIMIZE_HYPERPARAMS:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        best_params_dict = agent_manager.optimize_hyperparams(
            **message.params["kwargs"])
        agent_manager.save()
        del agent_manager
        response = interface.Message.create(data=best_params_dict)
    # AGENT_MANAGER_GET_WRITER_DATA
    elif message.command == interface.Command.AGENT_MANAGER_GET_WRITER_DATA:
        # writer scalar data
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        writer_data = agent_manager.get_writer_data()
        writer_data = writer_data or dict()
        for idx in writer_data:
            writer_data[idx] = writer_data[idx].to_csv(index=False)
        # tensoboard data
        tensorboard_bin_data = None
        if agent_manager.tensorboard_dir is not None:
            tensorboard_zip_file = rlberry.utils.io.zipdir(
                agent_manager.tensorboard_dir,
                agent_manager.output_dir / "tensorboard_data.zip",
            )
            if tensorboard_zip_file is not None:
                tensorboard_bin_data = open(tensorboard_zip_file, "rb").read()
                tensorboard_bin_data = base64.b64encode(
                    tensorboard_bin_data).decode("ascii")
        response = interface.Message.create(
            data=dict(writer_data=writer_data,
                      tensorboard_bin_data=tensorboard_bin_data))
        del agent_manager
    # end
    return response
Exemplo n.º 4
0
def test_agent_manager_1():
    # Define train and evaluation envs
    train_env = (GridWorld, {})

    # Parameters
    params = dict(hyperparameter1=-1, hyperparameter2=100)
    eval_kwargs = dict(eval_horizon=10)

    # Check DummyAgent
    agent = DummyAgent(train_env[0](**train_env[1]), **params)
    agent.fit(10)
    agent.policy(None)

    # Run AgentManager
    params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)]
    stats_agent1 = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
        init_kwargs_per_instance=params_per_instance,
    )
    stats_agent2 = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
    )
    agent_manager_list = [stats_agent1, stats_agent2]
    for st in agent_manager_list:
        st.fit()

    for ii, instance in enumerate(stats_agent1.agent_handlers):
        assert instance.hyperparameter1 == -1
        assert instance.hyperparameter2 == ii

    for ii, instance in enumerate(stats_agent2.agent_handlers):
        assert instance.hyperparameter1 == -1
        assert instance.hyperparameter2 == 100

    # learning curves
    plot_writer_data(agent_manager_list, tag="episode_rewards", show=False)

    # compare final policies
    evaluate_agents(agent_manager_list, show=False)

    # check if fitted
    for agent_manager in agent_manager_list:
        assert len(agent_manager.agent_handlers) == 4
        for agent in agent_manager.agent_handlers:
            assert agent.fitted

    # test saving/loading
    fname = stats_agent1.save()
    loaded_stats = AgentManager.load(fname)
    assert stats_agent1.unique_id == loaded_stats.unique_id

    # test hyperparameter optimization call
    loaded_stats.optimize_hyperparams(n_trials=5)
    loaded_stats.optimize_hyperparams(n_trials=5, continue_previous=True)

    for st in agent_manager_list:
        st.clear_output_dir()
Exemplo n.º 5
0
def test_agent_manager_2():
    # Define train and evaluation envs
    train_env = (GridWorld, {})
    eval_env = (GridWorld, {})

    # Parameters
    params = {}
    eval_kwargs = dict(eval_horizon=10)

    # Run AgentManager
    stats_agent1 = AgentManager(
        DummyAgent,
        train_env,
        eval_env=eval_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
    )
    stats_agent2 = AgentManager(
        DummyAgent,
        train_env,
        eval_env=eval_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
    )
    agent_manager_list = [stats_agent1, stats_agent2]
    for st in agent_manager_list:
        st.fit()

    # compare final policies
    evaluate_agents(agent_manager_list, show=False)
    evaluate_agents(agent_manager_list, show=False)

    # learning curves
    plot_writer_data(agent_manager_list, tag="episode_rewards", show=False)

    # check if fitted
    for agent_manager in agent_manager_list:
        assert len(agent_manager.agent_handlers) == 4
        for agent in agent_manager.agent_handlers:
            assert agent.fitted

    # test saving/loading
    fname = stats_agent1.save()
    loaded_stats = AgentManager.load(fname)
    assert stats_agent1.unique_id == loaded_stats.unique_id

    # test hyperparemeter optimization
    loaded_stats.optimize_hyperparams(n_trials=5)

    # delete some writers
    stats_agent1.set_writer(1, None)
    stats_agent1.set_writer(2, None)

    stats_agent1.clear_output_dir()
    stats_agent2.clear_output_dir()
Exemplo n.º 6
0
    manager.optimize_hyperparams(
        n_trials=5,
        timeout=None,
        n_fit=2,
        sampler_method="optuna_default",
        optuna_parallelization="thread",
    )

    initial_n_trials = len(manager.optuna_study.trials)

    # save
    manager_fname = manager.save()
    del manager

    # load
    manager = AgentManager.load(manager_fname)

    # continue previous optimization, now with 120s of timeout and multiprocessing
    manager.optimize_hyperparams(
        n_trials=512,
        timeout=120,
        n_fit=8,
        continue_previous=True,
        optuna_parallelization="process",
        n_optuna_workers=4,
    )

    print("number of initial trials = ", initial_n_trials)
    print("number of trials after continuing= ", len(manager.optuna_study.trials))

    print("----")
Exemplo n.º 7
0
def load_experiment_results(output_dir, experiment_name):
    """
    Parameters
    ----------
    output_dir : str or Path, or list
        directory (or list of directories) where experiment results are stored
        (command line argument --output_dir when running the eperiment)
    experiment_name : str or Path, or list
        name of yaml file describing the experiment.

    Returns
    -------
    output_data: dict
        dictionary such that

        output_data['experiment_dirs'] = list of paths to experiment directory (output_dir/experiment_name)
        output_data['agent_list'] = list containing the names of the agents in the experiment
        output_data['manager'][agent_name] = fitted AgentManager for agent_name
        output_data['dataframes'][agent_name] = dict of pandas data frames from the last run of the experiment
        output_data['data_dir'][agent_name] = directory from which the results were loaded
    """
    output_data = {}
    output_data["agent_list"] = []
    output_data["manager"] = {}
    output_data["dataframes"] = {}
    output_data["data_dir"] = {}

    # preprocess input
    if not isinstance(output_dir, list):
        output_dir = [output_dir]
    if not isinstance(experiment_name, list):
        experiment_name = [experiment_name]
    ndirs = len(output_dir)

    if ndirs > 1:
        assert (
            len(experiment_name) == ndirs
        ), "Number of experiment names must match the number of output_dirs "
    else:
        output_dir = len(experiment_name) * output_dir

    results_dirs = []
    for dd, exper in zip(output_dir, experiment_name):
        results_dirs.append(Path(dd) / Path(exper).stem)
    output_data["experiment_dirs"] = results_dirs

    # Subdirectories with data for each agent
    subdirs = []
    for dd in results_dirs:
        subdirs.extend([f for f in dd.iterdir() if f.is_dir()])

    # Create dictionary dict[agent_name] = most recent result dir
    data_dirs = {}
    for dd in subdirs:
        data_dirs[dd.name] = _get_most_recent_path(
            [f for f in dd.iterdir() if f.is_dir()])
        data_dirs[dd.name] = data_dirs[dd.name] / "manager_data"

    # Load data from each subdir
    for agent_name in data_dirs:
        output_data["agent_list"].append(agent_name)

        # store data_dir
        output_data["data_dir"][agent_name] = data_dirs[agent_name]

        # store AgentManager
        output_data["manager"][agent_name] = None
        fname = data_dirs[agent_name] / "manager_obj.pickle"
        try:
            output_data["manager"][agent_name] = AgentManager.load(fname)
        except Exception:
            logger.warning(
                f"Could not load AgentManager instance for {agent_name}.")
        logger.info("... loaded " + str(fname))

        # store data frames
        dataframes = {}
        csv_files = [
            f for f in data_dirs[agent_name].iterdir() if f.suffix == ".csv"
        ]
        for ff in csv_files:
            dataframes[ff.stem] = pd.read_csv(ff)
            logger.info("... loaded " + str(ff))
        output_data["dataframes"][agent_name] = dataframes

    return output_data