class TestContinuousPresets(unittest.TestCase):
    def setUp(self):
        self.env = GymEnvironment('LunarLanderContinuous-v2')
        self.env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_ddpg(self):
        self.validate(ddpg)

    def test_ppo(self):
        self.validate(ppo)

    def test_sac(self):
        self.validate(sac)

    def validate(self, builder):
        preset = builder.device('cpu').env(self.env).build()
        # normal agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.state)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
 def test_step(self):
     env = GymEnvironment('CartPole-v0')
     env.reset()
     state = env.step(1)
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 1.)
     self.assertFalse(state.done)
     self.assertEqual(state.mask, 1)
Пример #3
0
class TestContinuousPresets(unittest.TestCase):
    def setUp(self):
        self.env = GymEnvironment('LunarLanderContinuous-v2')
        self.env.reset()
        self.parallel_env = DuplicateEnvironment([
            GymEnvironment('LunarLanderContinuous-v2'),
            GymEnvironment('LunarLanderContinuous-v2'),
        ])
        self.parallel_env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_ddpg(self):
        self.validate(ddpg)

    def test_ppo(self):
        self.validate(ppo)

    def test_sac(self):
        self.validate(sac)

    def validate(self, builder):
        preset = builder.device('cpu').env(self.env).build()
        if isinstance(preset, ParallelPreset):
            return self.validate_parallel_preset(preset)
        return self.validate_standard_preset(preset)

    def validate_standard_preset(self, preset):
        # train agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.state)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)

    def validate_parallel_preset(self, preset):
        # train agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.parallel_env.state_array)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # parallel test_agent
        parallel_test_agent = preset.test_agent()
        parallel_test_agent.act(self.parallel_env.state_array)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
 def test_step_until_done(self):
     env = GymEnvironment('CartPole-v0')
     env.reset()
     for _ in range(100):
         state = env.step(1)
         if state.done:
             break
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 1.)
     self.assertTrue(state.done)
     self.assertEqual(state.mask, 0)
Пример #5
0
class TestClassicControlPresets(unittest.TestCase):
    def setUp(self):
        self.env = GymEnvironment('CartPole-v0')
        self.env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_a2c(self):
        self.validate(a2c)

    def test_c51(self):
        self.validate(c51)

    def test_ddqn(self):
        self.validate(ddqn)

    def test_dqn(self):
        self.validate(dqn)

    def test_ppo(self):
        self.validate(ppo)

    def test_rainbow(self):
        self.validate(rainbow)

    def test_vac(self):
        self.validate(vac)

    def test_vpg(self):
        self.validate(vpg)

    def test_vsarsa(self):
        self.validate(vsarsa)

    def test_vqn(self):
        self.validate(vqn)

    def validate(self, builder):
        preset = builder.device('cpu').env(self.env).build()
        # normal agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.state)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
 def test_reset_preconstructed_env(self):
     env = GymEnvironment(gym.make('CartPole-v0'))
     state = env.reset()
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 0)
     self.assertFalse(state.done)
     self.assertEqual(state.mask, 1)
class NoFramestackTest(unittest.TestCase):
    def setUp(self):
        self.agent = MockAgent()
        self.env = GymEnvironment('PongNoFrameskip-v4')
        self.body = DeepmindAtariBody(ToLegacyBody(self.agent),
                                      self.env,
                                      noop_max=0,
                                      frame_stack=1)

    def test_several_steps(self):
        self.env.reset()
        self.env.step(self.body.act(self.env.state, 0))
        self.env.step(self.body.act(self.env.state, -5))
        for _ in range(10):
            self.body.act(self.env.state, -5)
            self.assertEqual(self.agent.state.features.shape, (1, 1, 105, 80))
Пример #8
0
def evaluate_dqn_all(num_test_episodes):
    from all.experiments.watch import GreedyAgent
    from all.environments import GymEnvironment

    if Settings.CUDA:
        device = "cuda"
    else:
        device = "cpu"

    env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device)
    agent = GreedyAgent.load('models', env)
    num_crashed = 0
    num_arrived = 0
    action = None
    iteration = 0

    rlstats = StatsAggregator()
    episode_reward = 0

    def add_reward(state):
        return {"reward": episode_reward}

    rlstats.add_custom_stat_callback(add_reward)
    rewards = []

    while iteration < num_test_episodes:
        if env.done:
            actualEnv = env.env
            stats = actualEnv.get_stats()
            if len(stats["position_history"]) != 0:
                rlstats.add_episode_stats(stats)
                num_crashed += stats["crashed"]
                num_arrived += stats["merged"]
                iteration += 1
                print(iteration)
                rewards.append(episode_reward)
                episode_reward = 0
            env.reset()
        else:
            env.step(action)
        action = agent.eval(env.state, env.reward)
        episode_reward += env.reward

    logging.info("Rewards: {}".format(rewards))
    rlstats.print_stats()
class DeepmindAtariBodyPongTest(unittest.TestCase):
    def setUp(self):
        self.agent = MockAgent()
        self.env = GymEnvironment('PongNoFrameskip-v4')
        self.body = DeepmindAtariBody(ToLegacyBody(self.agent),
                                      self.env,
                                      noop_max=0)

    def test_initial_state(self):
        self.env.reset()
        action = self.body.act(self.env.state, 0)
        tt.assert_equal(action, torch.tensor([1]))  # fire on reset 1

    def test_second_state(self):
        self.env.reset()
        self.env.step(self.body.act(self.env.state, 0))
        action = self.body.act(self.env.state, self.env.reward)
        tt.assert_equal(action, torch.tensor([2]))  # fire on reset 2

    def test_several_steps(self):
        self.env.reset()
        self.env.step(self.body.act(self.env.state, 0))
        self.env.step(self.body.act(self.env.state, -5))
        for _ in range(4):
            action = self.body.act(self.env.state, -5)
            self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80))
            tt.assert_equal(action, INITIAL_ACTION)
            self.env.step(action)
        for _ in range(10):
            reward = -5  # should be clipped
            self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80))
            action = self.body.act(self.env.state, reward)
            tt.assert_equal(action, ACT_ACTION)
            self.env.step(action)
        self.assertEqual(self.agent.reward, -4)

    def test_terminal_state(self):
        self.env.reset()
        self.env.step(self.body.act(self.env.state, 0))
        for _ in range(11):
            reward = -5  # should be clipped
            action = self.body.act(self.env.state, reward)
            self.env.step(action)
        # pylint: disable=protected-access
        self.env.state._mask = torch.tensor([0])
        self.body.act(self.env.state, -1)
        tt.assert_equal(action, ACT_ACTION)
        self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80))
        self.assertEqual(self.agent.reward, -4)
Пример #10
0
class TestClassicControlPresets(unittest.TestCase):
    def setUp(self):
        self.env = GymEnvironment('CartPole-v0')
        self.env.reset()
        self.parallel_env = DuplicateEnvironment([GymEnvironment('CartPole-v0'), GymEnvironment('CartPole-v0')])
        self.parallel_env.reset()

    def tearDown(self):
        if os.path.exists('test_preset.pt'):
            os.remove('test_preset.pt')

    def test_a2c(self):
        self.validate(a2c)

    def test_c51(self):
        self.validate(c51)

    def test_ddqn(self):
        self.validate(ddqn)

    def test_dqn(self):
        self.validate(dqn)

    def test_ppo(self):
        self.validate(ppo)

    def test_rainbow(self):
        self.validate(rainbow)

    def test_vac(self):
        self.validate(vac)

    def test_vpg(self):
        self.validate(vpg)

    def test_vsarsa(self):
        self.validate(vsarsa)

    def test_vqn(self):
        self.validate(vqn)

    def validate(self, builder):
        preset = builder.device('cpu').env(self.env).build()
        if isinstance(preset, ParallelPreset):
            return self.validate_parallel_preset(preset)
        return self.validate_standard_preset(preset)

    def validate_standard_preset(self, preset):
        # train agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.env.state)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)

    def validate_parallel_preset(self, preset):
        # train agent
        agent = preset.agent(writer=DummyWriter(), train_steps=100000)
        agent.act(self.parallel_env.state_array)
        # test agent
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)
        # parallel test_agent
        parallel_test_agent = preset.test_agent()
        parallel_test_agent.act(self.parallel_env.state_array)
        # test save/load
        preset.save('test_preset.pt')
        preset = torch.load('test_preset.pt')
        test_agent = preset.test_agent()
        test_agent.act(self.env.state)