def __init__(self): self.agents = [MockEnv(3), MockEnv(5)] self.dones = set() self.last_obs = {} self.last_rew = {} self.last_done = {} self.last_info = {} self.i = 0 self.observation_space = gym.spaces.Discrete(10) self.action_space = gym.spaces.Discrete(2)
def testExternalEnvHorizonNotSupported(self): ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)), policy=MockPolicy, episode_horizon=20, batch_steps=10, batch_mode="complete_episodes") self.assertRaises(ValueError, lambda: ev.sample())
def test_external_env_horizon_not_supported(self): ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)), policy_spec=MockPolicy, episode_horizon=20, rollout_fragment_length=10, batch_mode="complete_episodes") self.assertRaises(ValueError, lambda: ev.sample())
def testExternalEnvBadActions(self): ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)), policy=BadPolicy, sample_async=True, batch_steps=40, batch_mode="truncate_episodes") self.assertRaises(Exception, lambda: ev.sample())
def test_external_env_bad_actions(self): ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)), policy_spec=BadPolicy, sample_async=True, rollout_fragment_length=40, batch_mode="truncate_episodes") self.assertRaises(Exception, lambda: ev.sample())
def testExternalEnvTruncateEpisodes(self): ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)), policy=MockPolicy, batch_steps=40, batch_mode="truncate_episodes") for _ in range(3): batch = ev.sample() self.assertEqual(batch.count, 40)
def test_external_env_truncate_episodes(self): ev = RolloutWorker(env_creator=lambda _: SimpleServing(MockEnv(25)), policy_spec=MockPolicy, rollout_fragment_length=40, batch_mode="truncate_episodes") for _ in range(3): batch = ev.sample() self.assertEqual(batch.count, 40)
def testExternalEnvOffPolicy(self): ev = RolloutWorker( env_creator=lambda _: SimpleOffPolicyServing(MockEnv(25), 42), policy=MockPolicy, batch_steps=40, batch_mode="complete_episodes") for _ in range(3): batch = ev.sample() self.assertEqual(batch.count, 50) self.assertEqual(batch["actions"][0], 42) self.assertEqual(batch["actions"][-1], 42)
def test_external_env_off_policy(self): ev = RolloutWorker( env_creator=lambda _: SimpleOffPolicyServing(MockEnv(25), 42), policy_spec=MockPolicy, rollout_fragment_length=40, batch_mode="complete_episodes") for _ in range(3): batch = ev.sample() self.assertEqual(batch.count, 50) self.assertEqual(batch["actions"][0], 42) self.assertEqual(batch["actions"][-1], 42)
def __init__(self, num, increment_obs=False): if increment_obs: # Observations are 0, 1, 2, 3... etc. as time advances self.agents = [MockEnv2(5) for _ in range(num)] else: # Observations are all zeros self.agents = [MockEnv(5) for _ in range(num)] self.dones = set() self.last_obs = {} self.last_rew = {} self.last_done = {} self.last_info = {} self.i = 0 self.num = num self.observation_space = gym.spaces.Discrete(10) self.action_space = gym.spaces.Discrete(2)
def __init__(self, num): self.agents = [MockEnv(25) for _ in range(num)] self.dones = set() self.observation_space = gym.spaces.Discrete(2) self.action_space = gym.spaces.Discrete(2) self.resetted = False