def test_step_limit_with_single_env_dataset(env_name: str):
    env = gym.make(env_name)
    start = 0
    target = 10
    env = DummyEnvironment(start=start, target=target, max_value=10 * 2)
    env = EnvDataset(env)

    max_steps = 5

    env = ObservationLimit(env, max_steps=max_steps)
    env.seed(123)
    values = []
    for i, obs in zip(range(100), env):
        values.append(obs)
        _ = env.send(1)
    assert values == list(range(start, max_steps))

    assert env.is_closed

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.reset()

    with pytest.raises(gym.error.ClosedEnvironmentError):
        env.step(env.action_space.sample())

    with pytest.raises(gym.error.ClosedEnvironmentError):
        for i, _ in zip(range(5), env):
            assert False
Пример #2
0
def test_step_normally_works_fine():
    env = DummyEnvironment()
    env = EnvDataset(env)
    env.seed(123)

    obs = env.reset()
    assert obs == 0

    obs, reward, done, info = env.step(0)
    assert (obs, reward, done, info) == (0, 5, False, {})
    obs, reward, done, info = env.step(1)
    assert (obs, reward, done, info) == (1, 4, False, {})
    obs, reward, done, info = env.step(1)
    assert (obs, reward, done, info) == (2, 3, False, {})
    obs, reward, done, info = env.step(2)
    assert (obs, reward, done, info) == (1, 4, False, {})
    obs, reward, done, info = env.step(1)
    assert (obs, reward, done, info) == (2, 3, False, {})
    obs, reward, done, info = env.step(1)
    assert (obs, reward, done, info) == (3, 2, False, {})
    obs, reward, done, info = env.step(1)
    assert (obs, reward, done, info) == (4, 1, False, {})

    obs, reward, done, info = env.step(1)
    assert (obs, reward, done, info) == (5, 0, True, {})

    env.reset()
    obs, reward, done, info = env.step(0)
    assert (obs, reward, done, info) == (0, 5, False, {})
Пример #3
0
def test_doesnt_raise_error_when_action_sent():
    env = DummyEnvironment()
    with EnvDataset(env) as env:
        env.reset()
        env.seed(123)

        for i, obs in zip(range(5), env):
            assert obs in env.observation_space
            reward = env.send(env.action_space.sample())
Пример #4
0
def test_raise_error_when_missing_action():
    env = DummyEnvironment()
    with EnvDataset(env) as env:
        env.reset()
        env.seed(123)

        with pytest.raises(RuntimeError):
            for i, observation in zip(range(5), env):
                pass
Пример #5
0
def test_measure_RL_performance_basics():
    env = DummyEnvironment(start=0, target=5, max_value=10)

    from sequoia.settings.active.continual.continual_rl_setting import \
        ContinualRLSetting

    # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards)

    env = MeasureRLPerformanceWrapper(env)
    env.seed(123)
    all_episode_rewards = []
    all_episode_steps = []

    for episode in range(5):
        episode_steps = 0
        episode_reward = 0
        obs = env.reset()
        print(f"Episode {episode}, obs: {obs}")
        done = False
        while not done:
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)
            episode_reward += reward
            episode_steps += 1
            # print(obs, reward, done, info)

        all_episode_steps.append(episode_steps)
        all_episode_rewards.append(episode_reward)
    from itertools import accumulate

    expected_metrics = {}
    for episode_steps, cumul_step, episode_reward in zip(
            all_episode_steps, accumulate(all_episode_steps),
            all_episode_rewards):
        expected_metrics[cumul_step] = EpisodeMetrics(
            n_samples=1,
            mean_episode_reward=episode_reward,
            mean_episode_length=episode_steps,
        )

    assert env.get_online_performance() == expected_metrics