def test_does_not_modify_action(self): env = PointEnv() a = env.action_space.sample() a_copy = a.copy() env.reset() env.step(a) assert a.all() == a_copy.all() env.close()
def test_done(self): env = PointEnv() for _ in range(1000): _, _, done, _ = env.step(env._goal) if done: break else: assert False, 'Should report done'
def test_reset(self): env = PointEnv() assert (env._point == np.array([0, 0])).all() a = env.action_space.sample() _ = env.step(a) env.reset() assert (env._point == np.array([0, 0])).all()
def test_observation_space(self): env = PointEnv() obs_space = env.observation_space a = env.action_space.sample() obs, _, _, _ = env.step(a) assert obs_space.contains(obs)