def test_segment_runner(env_id): env = make_vec_env(SerialVecEnv, make_gym_env, env_id, 3, 0, True) env_spec = EnvSpec(env) agent = RandomAgent(None, env_spec) runner = SegmentRunner(None, agent, env) D = runner(4) assert len(D) == 3 assert all([isinstance(d, Segment) for d in D]) assert all([d.T == 4 for d in D]) # Check if s in transition is equal to s_next in previous transition for d in D: for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]): assert np.allclose(s1.s_next, s2.s) # Long horizon D = runner(T=1000) for d in D: assert d.T == 1000 with pytest.raises(AssertionError): env = make_vec_env(SerialVecEnv, make_gym_env, env_id, 3, 0, False) SegmentRunner(None, agent, env)
def test_make_vec_env(self): venv1 = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 5, 1, True) venv2 = make_vec_env(ParallelVecEnv, make_gym_env, 'CartPole-v1', 5, 1, True) assert isinstance(venv1, VecEnv) and isinstance(venv1, SerialVecEnv) assert isinstance(venv2, VecEnv) and isinstance(venv2, ParallelVecEnv) assert venv1.num_env == venv2.num_env env_spec1 = EnvSpec(venv1) assert env_spec1.num_env == venv1.num_env env_spec2 = EnvSpec(venv2) assert env_spec2.num_env == venv2.num_env assert venv1.observation_space == venv2.observation_space assert venv1.action_space == venv2.action_space assert venv1.reward_range == venv2.reward_range assert venv1.T == venv2.T o1 = venv1.reset() o2 = venv2.reset() # Two environments should have same random seeds, then same results under same actions assert np.allclose(o1, o2) a = [1] * 5 o1, r1, d1, _ = venv1.step(a) o2, r2, d2, _ = venv2.step(a) assert np.allclose(o1, o2) assert np.allclose(r1, r2) assert np.allclose(d1, d2) assert not venv1.closed venv1.close() assert venv1.closed assert not venv2.closed venv2.close() assert venv2.closed
def test_equivalence_vec_env(): venv1 = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 5, 1) venv2 = make_vec_env(ParallelVecEnv, make_gym_env, 'CartPole-v1', 5, 1) assert venv1.observation_space == venv2.observation_space assert venv1.action_space == venv2.action_space assert venv1.num_env == venv2.num_env obs1 = venv1.reset() obs2 = venv2.reset() assert np.allclose(obs1, obs2) a = [1] * 5 obs1, rewards1, dones1, _ = venv1.step(a) obs2, rewards2, dones2, _ = venv2.step(a) assert np.allclose(obs1, obs2) assert np.allclose(rewards1, rewards2) assert np.allclose(dones1, dones2)
def test_episode_runner(env_id, num_env, init_seed, T): if env_id == 'Sanity': make_env = lambda: TimeLimit(SanityEnv()) else: make_env = lambda: gym.make(env_id) env = make_vec_env(make_env, num_env, init_seed) env = VecStepInfo(env) agent = RandomAgent(None, env, None) runner = EpisodeRunner() if num_env > 1: with pytest.raises(AssertionError): D = runner(agent, env, T) else: with pytest.raises(AssertionError): runner(agent, env.env, T) # must be VecStepInfo D = runner(agent, env, T) for traj in D: assert isinstance(traj, Trajectory) assert len(traj) <= env.spec.max_episode_steps assert traj.numpy_observations.shape == (len(traj) + 1, *env.observation_space.shape) if isinstance(env.action_space, gym.spaces.Discrete): assert traj.numpy_actions.shape == (len(traj),) else: assert traj.numpy_actions.shape == (len(traj), *env.action_space.shape) assert traj.numpy_rewards.shape == (len(traj),) assert traj.numpy_dones.shape == (len(traj), ) assert traj.numpy_masks.shape == (len(traj), ) assert len(traj.step_infos) == len(traj) if traj.completed: assert np.allclose(traj.observations[-1], traj.step_infos[-1]['last_observation'])
def test_final_state_from_segment(): env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(env) with pytest.raises(AssertionError): final_state_from_segment([1, 2, 3]) D = BatchSegment(env_spec, 4) D.obs = np.random.randn(*D.obs.shape) D.done.fill(False) D.done[0, -1] = True D.info[0] = [{}, {}, {}, {'terminal_observation': [0.1, 0.2, 0.3, 0.4]}] D.done[1, 2] = True D.info[1] = [{}, {}, {'terminal_observation': [1, 2, 3, 4]}, {}] D.done[2, -1] = True D.info[2] = [{}, {}, {}, {'terminal_observation': [10, 20, 30, 40]}] final_states = final_state_from_segment(D) assert final_states.shape == (3, ) + env_spec.observation_space.shape assert np.allclose(final_states[0], [0.1, 0.2, 0.3, 0.4]) assert np.allclose(final_states[1], D.numpy_observations[1, -1, ...]) assert not np.allclose(final_states[1], [1, 2, 3, 4]) assert np.allclose(final_states[2], [10, 20, 30, 40]) with pytest.raises(AssertionError): final_state_from_episode(D)
def test_terminal_state_from_episode(): env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(env) with pytest.raises(AssertionError): terminal_state_from_episode([1, 2, 3]) D = BatchEpisode(env_spec) D.obs[0] = [0.1, 0.2, 1.3] D.done[0] = [False, False, True] D.info[0] = [{}, {}, {'terminal_observation': 0.3}] D.obs[1] = [1, 2, 3, 4, 5, 6, 7, 8, 9] D.done[1] = [False] * 9 D.obs[2] = [10, 15] D.done[2] = [False, True] D.info[2] = [{}, {'terminal_observation': 20}] terminal_states = terminal_state_from_episode(D) assert terminal_states.shape == (2, ) + env_spec.observation_space.shape assert np.allclose(terminal_states[0], 0.3) assert np.allclose(terminal_states[1], 20) D.done[0][-1] = False D.done[2][-1] = False assert terminal_state_from_episode(D) is None with pytest.raises(AssertionError): terminal_state_from_segment(D)
def test_returns_from_episode(): env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(env) D = BatchEpisode(env_spec) D.r[0] = [1, 2, 3] D.done[0] = [False, False, True] D.r[1] = [1, 2, 3, 4, 5] D.done[1] = [False, False, False, False, False] D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8] D.done[2] = [False, False, False, False, False, False, False, True] out = returns_from_episode(D, 1.0) assert out.shape == (3, D.maxT) assert np.allclose(out[0], [6, 5, 3, 0, 0, 0, 0, 0]) assert np.allclose(out[1], [15, 14, 12, 9, 5, 0, 0, 0]) assert np.allclose(out[2], [36, 35, 33, 30, 26, 21, 15, 8]) del out out = returns_from_episode(D, 0.1) assert out.shape == (3, D.maxT) assert np.allclose(out[0], [1.23, 2.3, 3, 0, 0, 0, 0, 0]) assert np.allclose(out[1], [1.2345, 2.345, 3.45, 4.5, 5, 0, 0, 0]) assert np.allclose( out[2], [1.2345678, 2.345678, 3.45678, 4.5678, 5.678, 6.78, 7.8, 8]) with pytest.raises(AssertionError): returns_from_segment(D, 0.1)
def test_returns_from_segment(): env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(env) D = BatchSegment(env_spec, 5) D.r[0] = [1, 2, 3, 4, 5] D.done[0] = [False, False, False, False, False] D.r[1] = [1, 2, 3, 4, 5] D.done[1] = [False, False, True, False, False] D.r[2] = [1, 2, 3, 4, 5] D.done[2] = [True, False, False, False, True] out = returns_from_segment(D, 1.0) assert out.shape == (3, 5) assert np.allclose(out[0], [15, 14, 12, 9, 5]) assert np.allclose(out[1], [6, 5, 3, 9, 5]) assert np.allclose(out[2], [1, 14, 12, 9, 5]) del out out = returns_from_segment(D, 0.1) assert out.shape == (3, 5) assert np.allclose(out[0], [1.2345, 2.345, 3.45, 4.5, 5]) assert np.allclose(out[1], [1.23, 2.3, 3, 4.5, 5]) assert np.allclose(out[2], [1, 2.345, 3.45, 4.5, 5]) with pytest.raises(AssertionError): returns_from_episode(D, 0.1)
def test_bootstrapped_returns_from_episode(): env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(env) D = BatchEpisode(env_spec) D.r[0] = [1, 2, 3] D.done[0] = [False, False, True] D.completed[0] = True D.r[1] = [1, 2, 3, 4, 5] D.done[1] = [False, False, False, False, False] D.completed[1] = False D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8] D.done[2] = [False, False, False, False, False, False, False, True] D.completed[2] = True last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1) out = bootstrapped_returns_from_episode(D, last_Vs, 1.0) assert out.shape == (3, D.maxT) assert np.allclose(out[0], [6, 5, 3, 0, 0, 0, 0, 0]) assert np.allclose(out[1], [35, 34, 32, 29, 25, 0, 0, 0]) assert np.allclose(out[2], [36, 35, 33, 30, 26, 21, 15, 8]) del out out = bootstrapped_returns_from_episode(D, last_Vs, 0.1) assert out.shape == (3, D.maxT) assert np.allclose(out[0], [1.23, 2.3, 3, 0, 0, 0, 0, 0]) assert np.allclose(out[1], [1.2347, 2.347, 3.47, 4.7, 7, 0, 0, 0]) assert np.allclose( out[2], [1.2345678, 2.345678, 3.45678, 4.5678, 5.678, 6.78, 7.8, 8]) with pytest.raises(AssertionError): bootstrapped_returns_from_segment(D, last_Vs, 0.1)
def test_bootstrapped_returns_from_segment(): env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(env) D = BatchSegment(env_spec, 5) D.r[0] = [1, 2, 3, 4, 5] D.done[0] = [False, False, False, False, False] D.r[1] = [1, 2, 3, 4, 5] D.done[1] = [False, False, True, False, False] D.r[2] = [1, 2, 3, 4, 5] D.done[2] = [True, False, False, False, True] last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1) out = bootstrapped_returns_from_segment(D, last_Vs, 1.0) assert out.shape == (3, 5) assert np.allclose(out[0], [25, 24, 22, 19, 15]) assert np.allclose(out[1], [6, 5, 3, 29, 25]) assert np.allclose(out[2], [1, 14, 12, 9, 5]) del out out = bootstrapped_returns_from_segment(D, last_Vs, 0.1) assert out.shape == (3, 5) assert np.allclose(out[0], [1.2346, 2.346, 3.46, 4.6, 6]) assert np.allclose(out[1], [1.23, 2.3, 3, 4.7, 7]) assert np.allclose(out[2], [1, 2.345, 3.45, 4.5, 5]) with pytest.raises(AssertionError): bootstrapped_returns_from_episode(D, last_Vs, 0.1)
def make_env(config, seed): def _make_env(): env = gym.make(config['env.id']) env = env.env # strip out gym TimeLimit, TODO: remove until gym update it env = TimeLimit(env, env.spec.max_episode_steps) env = NormalizeAction(env) return env env = make_vec_env(_make_env, 1, seed) # single environment return env
def test_make_vec_env(env_id, num_env, init_seed): def make_env(): return gym.make(env_id) env = make_vec_env(make_env, num_env, init_seed) assert isinstance(env, VecEnv) seeds = [x.keywords['seed'] for x in env.list_make_env] seeder = Seeder(init_seed) assert seeds == seeder(num_env)
def make_env(config, seed): def _make_env(): env = gym.make(config['env.id']) env = env.env # strip out gym TimeLimit, TODO: remove until gym update it env = TimeLimit(env, env.spec.max_episode_steps) if config['env.clip_action'] and isinstance(env.action_space, Box): env = ClipAction(env) return env env = make_vec_env(_make_env, 1, seed) # single environment return env
def test_random_agent(): env = make_gym_env('Pendulum-v0', 0) env_spec = EnvSpec(env) agent = RandomAgent(None, env_spec) out = agent.choose_action(env.reset()) assert isinstance(out, dict) assert 'action' in out and out['action'].shape == (1,) venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(venv) agent = RandomAgent(None, env_spec) out = agent.choose_action(env.reset()) assert isinstance(out, dict) assert 'action' in out and len(out['action']) == 3 and isinstance(out['action'][0], int)
def test_episode_runner(vec_env, env_id): env = make_vec_env(vec_env, make_gym_env, env_id, 3, 0) env_spec = EnvSpec(env) if env_id == 'CartPole-v1': sticky_action = 1 elif env_id == 'Pendulum-v0': sticky_action = [0.1] T = 30 agent = StickyAgent(None, env_spec, sticky_action) runner = EpisodeRunner(None, agent, env) D = runner(T) assert D.N == 3 assert D.maxT == max(D.Ts) seeder = Seeder(0) seed1, seed2, seed3 = seeder(3) env1 = make_gym_env(env_id, seed1) env2 = make_gym_env(env_id, seed2) env3 = make_gym_env(env_id, seed3) for n, ev in enumerate([env1, env2, env3]): obs = ev.reset() assert np.allclose(obs, D.observations[n][0]) assert np.allclose(obs, D.numpy_observations[n, 0, ...]) for t in range(T): obs, reward, done, info = ev.step(sticky_action) assert np.allclose(reward, D.rewards[n][t]) assert np.allclose(reward, D.numpy_rewards[n, t]) assert np.allclose(done, D.dones[n][t]) assert done == D.numpy_dones[n, t] assert int(not done) == D.masks[n][t] assert int(not done) == D.numpy_masks[n, t] if done: assert np.allclose(obs, D.infos[n][t]['terminal_observation']) assert D.completes[n] assert np.allclose(0.0, D.numpy_observations[n, t + 1 + 1:, ...]) assert np.allclose(0.0, D.numpy_actions[n, t + 1:, ...]) assert np.allclose(0.0, D.numpy_rewards[n, t + 1:]) assert np.allclose(True, D.numpy_dones[n, t + 1:]) assert np.allclose(0.0, D.numpy_masks[n, t + 1:]) break else: assert np.allclose(obs, D.observations[n][t + 1])
def test_gae_from_episode(): env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(env) D = BatchEpisode(env_spec) D.r[0] = [1, 2, 3] D.done[0] = [False, False, True] D.completed[0] = True D.r[1] = [1, 2, 3, 4, 5] D.done[1] = [False, False, False, False, False] D.completed[1] = False D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8] D.done[2] = [False, False, False, False, False, False, False, True] D.completed[2] = True all_Vs = [ torch.tensor([[0.1], [0.5], [1.0]]), torch.tensor([[1.1], [1.5], [2.0]]), torch.tensor([[2.1], [2.5], [3.0]]), torch.tensor([[3.1], [3.5], [4.0]]), torch.tensor([[4.1], [4.5], [5.0]]), torch.tensor([[5.1], [5.5], [6.0]]), torch.tensor([[6.1], [6.5], [7.0]]), torch.tensor([[7.1], [7.5], [8.0]]) ] last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1) all_Vs = torch.stack(all_Vs, 1) out = gae_from_episode(D, all_Vs, last_Vs, 1.0, 0.5) assert out.shape == (3, D.maxT) assert np.allclose(out[0], [3.725, 3.45, 0.9, 0, 0, 0, 0, 0]) assert np.allclose(out[1], [6.40625, 8.8125, 11.625, 15.25, 20.5, 0, 0, 0]) assert np.allclose(out[2], [5.84375, 7.6875, 9.375, 10.75, 11.5, 11., 8, 0.]) del out out = gae_from_episode(D, all_Vs, last_Vs, 0.1, 0.2) assert out.shape == (3, D.maxT) assert np.allclose(out[0], [1.03256, 1.128, 0.9, 0, 0, 0, 0, 0]) assert np.allclose(out[1], [0.665348, 0.7674, 0.87, 1, 2.5, 0, 0, 0]) assert np.allclose(out[2], [ 0.206164098, 0.308204915, 0.410245728, 0.5122864, 0.61432, 0.716, 0.8, 0 ]) with pytest.raises(AssertionError): gae_from_segment(D, all_Vs, last_Vs, 0.1, 0.2)
def test_trajectory(init_seed, T): make_env = lambda: TimeLimit(SanityEnv()) env = make_vec_env(make_env, 1, init_seed) # single environment env = VecStepInfo(env) D = Trajectory() assert len(D) == 0 assert not D.completed observation, _ = env.reset() D.add_observation(observation) for t in range(T): action = [env.action_space.sample()] next_observation, reward, step_info = env.step(action) # unbatched for [reward, step_info] reward, step_info = map(lambda x: x[0], [reward, step_info]) if step_info.last: D.add_observation([step_info['last_observation']]) else: D.add_observation(next_observation) D.add_action(action) D.add_reward(reward) D.add_step_info(step_info) observation = next_observation if step_info.last: with pytest.raises(AssertionError): D.add_observation(observation) break assert len(D) > 0 assert len(D) <= T assert len(D) + 1 == len(D.observations) assert len(D) + 1 == len(D.numpy_observations) assert len(D) == len(D.actions) assert len(D) == len(D.numpy_actions) assert len(D) == len(D.rewards) assert len(D) == len(D.numpy_rewards) assert len(D) == len(D.numpy_dones) assert len(D) == len(D.numpy_masks) assert np.allclose(np.logical_not(D.numpy_dones), D.numpy_masks) assert len(D) == len(D.step_infos) if len(D) < T: assert step_info.last assert D.completed assert D.reach_terminal assert not D.reach_time_limit assert np.allclose(D.observations[-1], [step_info['last_observation']]) if not step_info.last: assert not D.completed assert not D.reach_terminal assert not D.reach_time_limit
def test_random_agent(env_id, num_env): make_env = lambda: gym.make(env_id) env = make_env() agent = RandomAgent(None, env, 'cpu') out = agent.choose_action(env.reset()) assert isinstance(out, dict) assert out['raw_action'] in env.action_space del env, agent, out env = make_vec_env(make_env, num_env, 0) agent = RandomAgent(None, env, 'cpu') out = agent.choose_action(env.reset()) assert isinstance(out, dict) assert len(out['raw_action']) == num_env assert all(action in env.action_space for action in out['raw_action'])
def test_random_policy(): env = make_gym_env('Pendulum-v0', 0) env_spec = EnvSpec(env) policy = RandomPolicy(None, env_spec) out = policy(env.reset()) assert isinstance(out, dict) assert 'action' in out and out['action'].shape == (1, ) venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v0', 3, 0, False) env_spec = EnvSpec(venv) policy = RandomPolicy(None, env_spec) out = policy(env.reset()) assert isinstance(out, dict) assert 'action' in out and len(out['action']) == 3 and isinstance( out['action'][0], int)
def test_vec_clip_action(): env = make_vec_env(SerialVecEnv, make_gym_env, 'MountainCarContinuous-v0', 2, 0) clipped_env = VecClipAction(env) action = [[0.5], [1000]] env.reset() _, rewards, _, _ = env.step(action) clipped_env.reset() _, rewards_clipped, _, _ = clipped_env.step(action) assert rewards[0] == rewards_clipped[0] assert abs(rewards[1]) > abs(rewards_clipped[1])
def test_get_wrapper(env_id): env = make_vec_env(SerialVecEnv, make_gym_env, env_id, 3, 0) env = VecStandardize(env) env = VecClipAction(env) out = get_wrapper(env, 'VecClipAction') assert out.__class__.__name__ == 'VecClipAction' del out out = get_wrapper(env, 'VecStandardize') assert out.__class__.__name__ == 'VecStandardize' del out out = get_wrapper(env, 'SerialVecEnv') assert out.__class__.__name__ == 'SerialVecEnv'
def test_td0_error_from_episode(): env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(env) D = BatchEpisode(env_spec) D.r[0] = [1, 2, 3] D.done[0] = [False, False, True] D.completed[0] = True D.r[1] = [1, 2, 3, 4, 5] D.done[1] = [False, False, False, False, False] D.completed[1] = False D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8] D.done[2] = [False, False, False, False, False, False, False, True] D.completed[2] = True all_Vs = [ torch.tensor([[0.1], [0.5], [1.0]]), torch.tensor([[1.1], [1.5], [2.0]]), torch.tensor([[2.1], [2.5], [3.0]]), torch.tensor([[3.1], [3.5], [4.0]]), torch.tensor([[4.1], [4.5], [5.0]]), torch.tensor([[5.1], [5.5], [6.0]]), torch.tensor([[6.1], [6.5], [7.0]]), torch.tensor([[7.1], [7.5], [8.0]]) ] last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1) all_Vs = torch.stack(all_Vs, 1) out = td0_error_from_episode(D, all_Vs, last_Vs, 1.0) assert out.shape == (3, D.maxT) assert np.allclose(out[0], [2.0, 3, 0.9, 0, 0, 0, 0, 0]) assert np.allclose(out[1], [2, 3, 4, 5, 20.5, 0, 0, 0]) assert np.allclose(out[2], [2, 3, 4, 5, 6, 7, 8, 0]) del out out = td0_error_from_episode(D, all_Vs, last_Vs, 0.1) assert out.shape == (3, D.maxT) assert np.allclose(out[0], [1.01, 1.11, 0.9, 0, 0, 0, 0, 0]) assert np.allclose(out[1], [0.65, 0.75, 0.85, 0.95, 2.5, 0, 0, 0]) assert np.allclose(out[2], [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0]) with pytest.raises(AssertionError): td0_error_from_segment(D, all_Vs, last_Vs, 0.1)
def init(self, seed, config): # Make environment # Remember to seed it in each working function ! self.env = make_vec_env(vec_env_class=SerialVecEnv, make_env=make_gym_env, env_id=config['env.id'], num_env=1, init_seed=seed) self.env_spec = EnvSpec(self.env) # Make agent self.network = Network(config=config, env_spec=self.env_spec) if self.env_spec.control_type == 'Discrete': self.policy = CategoricalPolicy(config=config, network=self.network, env_spec=self.env_spec) elif self.env_spec.control_type == 'Continuous': self.policy = GaussianPolicy(config=config, network=self.network, env_spec=self.env_spec) self.agent = Agent(policy=self.policy, config=config)
def _prepare(self, config): self.env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'], config['train.N'], 0) self.env = VecClipAction(self.env) if config['env.standardize']: self.env = VecStandardize(self.env, use_obs=True, use_reward=False, clip_obs=10.0, clip_reward=10.0, gamma=0.99, eps=1e-08) self.env_spec = EnvSpec(self.env) self.device = torch.device('cpu') self.agent = Agent(config, self.env_spec, self.device)
def test_vec_monitor(env_id, vec_env_class, num_env, seed, runner_class): env = make_vec_env(vec_env_class, make_gym_env, env_id, num_env, seed) env_spec = EnvSpec(env) env = VecMonitor(env) agent = RandomAgent(None, env_spec) runner = runner_class(None, agent, env) D = runner(1050) for infos in D.infos: for info in infos: if 'terminal_observation' in info: assert 'episode' in info assert 'return' in info['episode'] and isinstance( info['episode']['return'], np.float32) assert 'horizon' in info['episode'] and isinstance( info['episode']['horizon'], np.int32) assert 'time' in info['episode'] and isinstance( info['episode']['time'], float)
def test_sticky_agent(): sticky_action = 0 env = make_gym_env('CartPole-v1', 0) env_spec = EnvSpec(env) agent = StickyAgent(None, env_spec, sticky_action) out = agent.choose_action(env.reset()) assert isinstance(out, dict) assert 'action' in out and isinstance(out['action'], int) assert out['action'] == sticky_action venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0) env_spec = EnvSpec(venv) agent = StickyAgent(None, env_spec, sticky_action) out = agent.choose_action(env.reset()) assert isinstance(out, dict) assert 'action' in out and len(out['action']) == 3 and isinstance(out['action'][0], int) assert np.allclose(out['action'], [0, 0, 0])
def test_rolling(self, vec_env_class): venv = make_vec_env(vec_env_class, make_gym_env, 'CartPole-v1', 5, 1, rolling=False) venv.reset() for _ in range(100): observations, rewards, dones, infos = venv.step( [venv.action_space.sample()] * 5) assert all( [len(x) == 5 for x in [observations, rewards, dones, infos]]) assert all( [x == [None] * 5 for x in [observations, rewards, dones, infos]]) venv.reset() result = venv.step([venv.action_space.sample()] * 5) assert all([None not in result[i] for i in [1, 2, 3]])
def test_rolling_segment_runner(vec_env, env_id): env = make_vec_env(vec_env, make_gym_env, env_id, 3, 0) env_spec = EnvSpec(env) if env_id == 'CartPole-v1': sticky_action = 1 elif env_id == 'Pendulum-v0': sticky_action = [0.1] T = 30 agent = StickyAgent(None, env_spec, sticky_action) runner = RollingSegmentRunner(None, agent, env) D = runner(T) assert D.N == 3 assert D.T == T seeder = Seeder(0) seed1, seed2, seed3 = seeder(3) env1 = make_gym_env(env_id, seed1) env2 = make_gym_env(env_id, seed2) env3 = make_gym_env(env_id, seed3) for n, ev in enumerate([env1, env2, env3]): obs = ev.reset() assert np.allclose(obs, D.numpy_observations[n, 0, ...]) for t in range(T): obs, reward, done, info = ev.step(sticky_action) if done: info['terminal_observation'] = obs obs = ev.reset() assert np.allclose(obs, D.numpy_observations[n, t + 1, ...]) assert np.allclose(sticky_action, D.numpy_actions[n, t, ...]) assert np.allclose(reward, D.numpy_rewards[n, t]) assert done == D.numpy_dones[n, t] assert int(not done) == D.numpy_masks[n, t] if done: assert np.allclose(info['terminal_observation'], D.infos[n][t]['terminal_observation'])
def test_vec_env(self, vec_env_class): # unpack class v_id, vec_env_class = vec_env_class venv = make_vec_env(vec_env_class, make_gym_env, 'CartPole-v1', 5, 1, True) assert isinstance(venv, VecEnv) assert v_id in [0, 1] if v_id == 0: isinstance(venv, SerialVecEnv) elif v_id == 1: assert isinstance(venv, ParallelVecEnv) assert venv.num_env == 5 assert not venv.closed and venv.viewer is None assert venv.unwrapped is venv assert isinstance(venv.observation_space, Box) assert isinstance(venv.action_space, Discrete) assert venv.T == 500 assert venv.max_episode_reward == 475.0 assert venv.reward_range == (-float('inf'), float('inf')) obs = venv.reset() assert len(obs) == 5 assert np.asarray(obs).shape == (5, 4) assert all([not np.allclose(obs[0], obs[i]) for i in [1, 2, 3, 4]]) a = [1] * 5 obs, rewards, dones, infos = venv.step(a) assert all([len(item) == 5 for item in [obs, rewards, dones, infos]]) assert all([not np.allclose(obs[0], obs[i]) for i in [1, 2, 3, 4]]) # EnvSpec env_spec = EnvSpec(venv) assert isinstance(env_spec.action_space, Discrete) assert isinstance(env_spec.observation_space, Box) assert env_spec.control_type == 'Discrete' assert env_spec.T == 500 assert env_spec.max_episode_reward == 475.0 assert env_spec.reward_range == (-float('inf'), float('inf')) assert env_spec.is_vec_env venv.close() assert venv.closed
def make_env(config, seed, mode): assert mode in ['train', 'eval'] def _make_env(): env = gym.make(config['env.id']) if config['env.clip_action'] and isinstance(env.action_space, Box): env = ClipAction(env) return env env = make_vec_env(_make_env, 1, seed) # single environment env = VecMonitor(env) if mode == 'train': if config['env.standardize_obs']: env = VecStandardizeObservation(env, clip=5.) if config['env.standardize_reward']: env = VecStandardizeReward(env, clip=10., gamma=config['agent.gamma']) env = VecStepInfo(env) return env