class NoFramestackTest(unittest.TestCase):
    def setUp(self):
        self.agent = MockAgent()
        self.env = GymEnvironment('PongNoFrameskip-v4')
        self.body = DeepmindAtariBody(ToLegacyBody(self.agent),
                                      self.env,
                                      noop_max=0,
                                      frame_stack=1)

    def test_several_steps(self):
        self.env.reset()
        self.env.step(self.body.act(self.env.state, 0))
        self.env.step(self.body.act(self.env.state, -5))
        for _ in range(10):
            self.body.act(self.env.state, -5)
            self.assertEqual(self.agent.state.features.shape, (1, 1, 105, 80))
 def test_step(self):
     env = GymEnvironment('CartPole-v0')
     env.reset()
     state = env.step(1)
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 1.)
     self.assertFalse(state.done)
     self.assertEqual(state.mask, 1)
Пример #3
0
def evaluate_dqn_all(num_test_episodes):
    from all.experiments.watch import GreedyAgent
    from all.environments import GymEnvironment

    if Settings.CUDA:
        device = "cuda"
    else:
        device = "cpu"

    env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device)
    agent = GreedyAgent.load('models', env)
    num_crashed = 0
    num_arrived = 0
    action = None
    iteration = 0

    rlstats = StatsAggregator()
    episode_reward = 0

    def add_reward(state):
        return {"reward": episode_reward}

    rlstats.add_custom_stat_callback(add_reward)
    rewards = []

    while iteration < num_test_episodes:
        if env.done:
            actualEnv = env.env
            stats = actualEnv.get_stats()
            if len(stats["position_history"]) != 0:
                rlstats.add_episode_stats(stats)
                num_crashed += stats["crashed"]
                num_arrived += stats["merged"]
                iteration += 1
                print(iteration)
                rewards.append(episode_reward)
                episode_reward = 0
            env.reset()
        else:
            env.step(action)
        action = agent.eval(env.state, env.reward)
        episode_reward += env.reward

    logging.info("Rewards: {}".format(rewards))
    rlstats.print_stats()
 def test_step_until_done(self):
     env = GymEnvironment('CartPole-v0')
     env.reset()
     for _ in range(100):
         state = env.step(1)
         if state.done:
             break
     self.assertEqual(state.observation.shape, (4, ))
     self.assertEqual(state.reward, 1.)
     self.assertTrue(state.done)
     self.assertEqual(state.mask, 0)
class DeepmindAtariBodyPongTest(unittest.TestCase):
    def setUp(self):
        self.agent = MockAgent()
        self.env = GymEnvironment('PongNoFrameskip-v4')
        self.body = DeepmindAtariBody(ToLegacyBody(self.agent),
                                      self.env,
                                      noop_max=0)

    def test_initial_state(self):
        self.env.reset()
        action = self.body.act(self.env.state, 0)
        tt.assert_equal(action, torch.tensor([1]))  # fire on reset 1

    def test_second_state(self):
        self.env.reset()
        self.env.step(self.body.act(self.env.state, 0))
        action = self.body.act(self.env.state, self.env.reward)
        tt.assert_equal(action, torch.tensor([2]))  # fire on reset 2

    def test_several_steps(self):
        self.env.reset()
        self.env.step(self.body.act(self.env.state, 0))
        self.env.step(self.body.act(self.env.state, -5))
        for _ in range(4):
            action = self.body.act(self.env.state, -5)
            self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80))
            tt.assert_equal(action, INITIAL_ACTION)
            self.env.step(action)
        for _ in range(10):
            reward = -5  # should be clipped
            self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80))
            action = self.body.act(self.env.state, reward)
            tt.assert_equal(action, ACT_ACTION)
            self.env.step(action)
        self.assertEqual(self.agent.reward, -4)

    def test_terminal_state(self):
        self.env.reset()
        self.env.step(self.body.act(self.env.state, 0))
        for _ in range(11):
            reward = -5  # should be clipped
            action = self.body.act(self.env.state, reward)
            self.env.step(action)
        # pylint: disable=protected-access
        self.env.state._mask = torch.tensor([0])
        self.body.act(self.env.state, -1)
        tt.assert_equal(action, ACT_ACTION)
        self.assertEqual(self.agent.state.features.shape, (1, 4, 105, 80))
        self.assertEqual(self.agent.reward, -4)