示例#1
0
    def test_select_action_steps(self):
        policy = EpsilonGreedyPolicy(1, 0, 2)

        self.assertEqual(1, policy.select_action([1, 2, 3], 0))
        self.assertEqual(2, policy.select_action([1, 2, 3], 1))
        self.assertEqual(2, policy.select_action([1, 2, 3], 2))
        self.assertEqual(2, policy.select_action([1, 2, 3], 3))
示例#2
0
    def test_select_action_for_high_epsilon(self):
        policy = EpsilonGreedyPolicy(1, 1, 1)

        self.assertEqual(1, policy.select_action([1, 2, 3], 0))
示例#3
0
    def test_select_action(self):
        policy = EpsilonGreedyPolicy(0.1, 0.1, 1)

        self.assertEqual(2, policy.select_action([1, 2, 3], 0))
示例#4
0
    def test_select_action_for_low_epsilon(self):
        policy = EpsilonGreedyPolicy(0, 0, 1)

        self.assertEqual(2, policy.select_action([1, 2, 3], 0))