def test_reset_combined_setup_observation(make_env, datums): datums.add().rows( [2, 1, 3], [3, 2, 4], [1, 1, 1], ) datums.add().rows( [1e-6, 1e-9, 1], [1, 1e-6, 2], [1, 1, 1], ) assert_obs_eq( make_env(window_size=2).reset(), [[[2, 3], [1e-6, 1]], [[1, 2], [1e-9, 1e-6]], [[3, 4], [1, 2]]])
def test_reset_returns_properly_windowed_observation(make_env, datums): datums.add().rows([1], [2], [3]) assert_obs_eq(make_env(window_size=2).reset(), [[1, 2]])
def test_resetting_the_environment_resets_observations(make_ready_env, datums): datums.add().rows([1], [2], [3]) env = make_ready_env() assert_obs_eq(unpack_obs(idle_step(env)), [2]) env.reset() assert_obs_eq(unpack_obs(idle_step(env)), [2])
def test_reset_returns_first_value_from_datums(make_env, datums): datums.add().rows([1], [2]) assert_obs_eq(make_env().reset(), [1])
def test_reset_moves_agent_back_to_center(env, make_observation, center): env.step(0) assert_obs_eq(env.reset(), make_observation(agent_pos=center))
def test_stepping_the_environment_returns_next_observation( make_ready_env, datums): datums.add().rows([1], [2], [3]) env = make_ready_env() assert_obs_eq(unpack_obs(idle_step(env)), [2]) assert_obs_eq(unpack_obs(idle_step(env)), [3])
def test_navigating_the_agent(env, make_observation, center): actions = [1, 1, 0, 0, 0, 1] offsets = [1, 2, 1, 0, -1, 0] for a, o in zip(actions, offsets): assert_obs_eq(unpack_obs(env.step(a)), make_observation(agent_pos=center + o))
def test_agent_starts_in_the_center(env, make_observation, center): assert_obs_eq(env.reset(), make_observation(agent_pos=center))
def test_setting_the_knobs_to_their_correct_value_solves_the_environment(env): env.reset() obs, _, done, _ = env.step(env.knobs) assert done assert_obs_eq(obs, np.zeros_like(obs))
def test_observation_from_step_indicates_direction_of_solution(env, idle): initial = env.reset() assert_obs_eq(unpack_obs(env.step(idle)), initial) assert_obs_eq(unpack_obs(env.step(initial)), initial * -1)
def test_initial_observation_is_direction_from_zero(env): env.seed(42) assert_obs_eq(env.reset(), make_obs([-1, -1, 1, -1, 1, 1, -1])) assert_obs_eq(env.reset(), make_obs([-1, -1, 1, 1, 1, -1, 1]))