Exemplo n.º 1
0
    def test_seed_not_set(self):
        seed = 123

        env1 = TestEnv()
        env1.seed(seed)
        brain1 = TestBrain(self._n_states, self._n_actions)
        acting1 = EpsGreedyPolicy(self._eps)
        replay_memory1 = Memory(self._capacity, self._batch_size)
        agent1 = TdAgent(self._n_episodes, env1, brain1, acting1,
                         replay_memory1, self._gamma)

        rewards1, _ = agent1.run()

        env2 = TestEnv()
        env2.seed(seed)
        brain2 = TestBrain(self._n_states, self._n_actions)
        acting2 = EpsGreedyPolicy(self._eps)
        replay_memory2 = Memory(self._capacity, self._batch_size)
        agent2 = TdAgent(self._n_episodes, env2, brain2, acting2,
                         replay_memory2, self._gamma)

        rewards2, _ = agent2.run()

        # assert rewards1 != rewards2
        diff = rewards1 - rewards2
        diff = np.abs(diff)
        diff = np.sum(diff)
        self.assertGreater(diff, 0)
Exemplo n.º 2
0
    def test_seed_set(self):
        seed = 123

        env1 = TestEnv()
        env1.seed(seed)
        brain1 = TestBrain(self._n_states, self._n_actions)
        acting1 = EpsGreedyPolicy(self._eps)
        replay_memory1 = Memory(self._capacity, self._batch_size)
        agent1 = TdAgent(self._n_episodes, env1, brain1, acting1,
                         replay_memory1, self._gamma)
        agent1.seed(seed)

        rewards1, _ = agent1.run()
        rewards1 = rewards1.tolist()

        env2 = TestEnv()
        env2.seed(seed)
        brain2 = TestBrain(self._n_states, self._n_actions)
        acting2 = EpsGreedyPolicy(self._eps)
        replay_memory2 = Memory(self._capacity, self._batch_size)
        agent2 = TdAgent(self._n_episodes, env2, brain2, acting2,
                         replay_memory2, self._gamma)
        agent2.seed(seed)

        rewards2, _ = agent2.run()
        rewards2 = rewards2.tolist()

        self.assertListEqual(rewards1, rewards2)
Exemplo n.º 3
0
class EpsGreedyPolicyTest(unittest.TestCase):
    def setUp(self):
        self._eps = 0.1
        self._acting = EpsGreedyPolicy(self._eps)

    def test_seed(self):
        seed = 123
        eps = 0.7
        lowest_q = 1
        highest_q = 10
        n_actions = 10
        n_qs = 1000

        acting1 = EpsGreedyPolicy(eps)
        acting1.seed(seed)

        acting2 = EpsGreedyPolicy(eps)
        acting2.seed(seed)

        qs = [
            np.random.uniform(lowest_q, highest_q, size=(1, n_actions))
            for _ in range(n_qs)
        ]

        for q in qs:
            a1 = acting1.act(q)
            a2 = acting2.act(q)

            self.assertEqual(a1, a2)

    def test_act(self):
        n_total = 10000
        lowest_q = 1
        highest_q = 10
        n_actions = 100

        n_max_q = 0
        n_random = 0

        for _ in range(n_total):
            q = np.random.uniform(lowest_q, highest_q, size=(1, n_actions))
            arg_max = np.argmax(q)
            action = self._acting.act(q)

            if arg_max == action:
                n_max_q += 1
            else:
                n_random += 1

        actual = n_random / n_total

        max_deviation = 0.1
        actual_deviation = abs((self._eps - actual) / self._eps)

        self.assertLess(actual_deviation, max_deviation)
Exemplo n.º 4
0
    def test_seed(self):
        seed = 123
        eps = 0.7
        lowest_q = 1
        highest_q = 10
        n_actions = 10
        n_qs = 1000

        acting1 = EpsGreedyPolicy(eps)
        acting1.seed(seed)

        acting2 = EpsGreedyPolicy(eps)
        acting2.seed(seed)

        qs = [
            np.random.uniform(lowest_q, highest_q, size=(1, n_actions))
            for _ in range(n_qs)
        ]

        for q in qs:
            a1 = acting1.act(q)
            a2 = acting2.act(q)

            self.assertEqual(a1, a2)
Exemplo n.º 5
0
    def setUp(self):
        self._n_episodes = 10
        self._env = TestEnv()
        self._n_states = 3
        self._n_actions = 7
        self._brain = TestBrain(self._n_states, self._n_actions)
        self._eps = 0.1
        self._acting = EpsGreedyPolicy(self._eps)
        self._capacity = 1000
        self._batch_size = 8
        self._replay_memory = Memory(self._capacity, self._batch_size)
        self._gamma = 0.99
        self._train_freq = 4

        self._agent = TdAgent(self._n_episodes, self._env, self._brain, self._acting,
                              self._replay_memory, self._gamma, train_freq=self._train_freq)
Exemplo n.º 6
0
 def setUp(self):
     self._eps = 0.1
     self._acting = EpsGreedyPolicy(self._eps)