Exemplo n.º 1
0
def test_vec_env_monitor_kwargs():
    env = make_vec_env("MountainCarContinuous-v0",
                       n_envs=1,
                       seed=0,
                       monitor_kwargs={"allow_early_resets": False})
    assert env.get_attr("allow_early_resets")[0] is False

    env = make_atari_env("BreakoutNoFrameskip-v4",
                         n_envs=1,
                         seed=0,
                         monitor_kwargs={"allow_early_resets": False})
    assert env.get_attr("allow_early_resets")[0] is False

    env = make_vec_env("MountainCarContinuous-v0",
                       n_envs=1,
                       seed=0,
                       monitor_kwargs={"allow_early_resets": True})
    assert env.get_attr("allow_early_resets")[0] is True

    env = make_atari_env(
        "BreakoutNoFrameskip-v4",
        n_envs=1,
        seed=0,
        monitor_kwargs={"allow_early_resets": True},
    )
    assert env.get_attr("allow_early_resets")[0] is True
Exemplo n.º 2
0
def test_custom_vec_env(tmp_path):
    """
    Stand alone test for a special case (passing a custom VecEnv class) to avoid doubling the number of tests.
    """
    monitor_dir = tmp_path / "test_make_vec_env/"
    env = make_vec_env(
        "CartPole-v1",
        n_envs=1,
        monitor_dir=monitor_dir,
        seed=0,
        vec_env_cls=SubprocVecEnv,
        vec_env_kwargs={"start_method": None},
    )

    assert env.num_envs == 1
    assert isinstance(env, SubprocVecEnv)
    assert os.path.isdir(monitor_dir)
    # Kill subprocess
    env.close()
    # Cleanup folder
    shutil.rmtree(monitor_dir)

    # This should fail because DummyVecEnv does not have any keyword argument
    with pytest.raises(TypeError):
        make_vec_env("CartPole-v1", n_envs=1, vec_env_kwargs={"dummy": False})
Exemplo n.º 3
0
def test_make_vec_env(env_id, n_envs, vec_env_cls, wrapper_class):
    env = make_vec_env(env_id,
                       n_envs,
                       vec_env_cls=vec_env_cls,
                       wrapper_class=wrapper_class,
                       monitor_dir=None,
                       seed=0)

    assert env.num_envs == n_envs

    if vec_env_cls is None:
        assert isinstance(env, DummyVecEnv)
        if wrapper_class is not None:
            assert isinstance(env.envs[0], wrapper_class)
        else:
            assert isinstance(env.envs[0], Monitor)
    else:
        assert isinstance(env, SubprocVecEnv)
    # Kill subprocesses
    env.close()
Exemplo n.º 4
0
def test_callbacks(tmp_path, model_class, policy_class):
    log_folder = tmp_path / "logs/callbacks/"

    # DQN only support discrete actions
    env_name = select_env(model_class)
    # Create RL model
    # Small network for fast test
    model = model_class(policy_class,
                        env_name,
                        policy_kwargs=dict(net_arch=[32]))

    checkpoint_callback = CheckpointCallback(save_freq=100,
                                             save_path=log_folder)

    eval_env = gym.make(env_name)
    # Stop training if the performance is good enough
    callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=-1200,
                                                     verbose=1)

    eval_callback = EvalCallback(
        eval_env,
        callback_on_new_best=callback_on_best,
        best_model_save_path=log_folder,
        log_path=log_folder,
        eval_freq=int(N_STEPS_SMALL / 5),
        warn=False,
    )
    # Equivalent to the `checkpoint_callback`
    # but here in an event-driven manner
    checkpoint_on_event = CheckpointCallback(save_freq=1,
                                             save_path=log_folder,
                                             name_prefix="event")

    event_callback = EveryNTimesteps(n_steps=500, callback=checkpoint_on_event)

    # Stop training if max number of episodes is reached
    callback_max_episodes = StopTrainingOnMaxEpisodes(max_episodes=100,
                                                      verbose=1)

    callback = CallbackList([
        checkpoint_callback, eval_callback, event_callback,
        callback_max_episodes
    ])
    model.learn(N_STEPS_SMALL, callback=callback)

    # Check access to local variables
    assert model.env.observation_space.contains(callback.locals["new_obs"][0])
    # Check that the child callback was called
    assert checkpoint_callback.locals["new_obs"] is callback.locals["new_obs"]
    assert event_callback.locals["new_obs"] is callback.locals["new_obs"]
    assert checkpoint_on_event.locals["new_obs"] is callback.locals["new_obs"]
    # Check that internal callback counters match models' counters
    assert event_callback.num_timesteps == model.num_timesteps
    assert event_callback.n_calls == model.num_timesteps

    model.learn(N_STEPS_SMALL, callback=None)
    # Transform callback into a callback list automatically
    model.learn(N_STEPS_SMALL, callback=[checkpoint_callback, eval_callback])
    # Automatic wrapping, old way of doing callbacks
    model.learn(N_STEPS_SMALL, callback=lambda _locals, _globals: True)

    # Testing models that support multiple envs
    if model_class in [A2C, PPO]:
        max_episodes = 1
        n_envs = 2
        # Pendulum-v0 has a timelimit of 200 timesteps
        max_episode_length = 200
        envs = make_vec_env(env_name, n_envs=n_envs, seed=0)

        model = model_class(policy_class,
                            envs,
                            policy_kwargs=dict(net_arch=[32]))

        callback_max_episodes = StopTrainingOnMaxEpisodes(
            max_episodes=max_episodes, verbose=1)
        callback = CallbackList([callback_max_episodes])
        model.learn(max_episode_length * 2, callback=callback)

        # Check that the actual number of episodes and timesteps per env matches the expected one
        episodes_per_env = callback_max_episodes.n_episodes // n_envs
        assert episodes_per_env == max_episodes
        timesteps_per_env = model.num_timesteps // n_envs
        assert timesteps_per_env == max_episode_length

    if os.path.exists(log_folder):
        shutil.rmtree(log_folder)
Exemplo n.º 5
0
def test_vec_env_kwargs():
    env = make_vec_env("MountainCarContinuous-v0",
                       n_envs=1,
                       seed=0,
                       env_kwargs={"goal_velocity": 0.11})
    assert env.get_attr("goal_velocity")[0] == 0.11
Exemplo n.º 6
0
def test_discrete_obs_space(model_class, policy_class, env):
    env = make_vec_env(env, n_envs=2, seed=0)
    model_class(policy_class, env, n_steps=256).learn(100)