Пример #1
0
 def test_single_column_no_wall(self):
     mdp = MDP(length=3,
               width=1,
               p_walk=1,
               p_run=1,
               reward_run=0,
               reward_walk=0,
               wall_list=[],
               exit_list=[((0, 0), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     test.assert_array_almost_equal(np.array([[0], [0], [4]]), mdp.policy)
Пример #2
0
 def test_small_value_iteration_no_wall(self):
     mdp = MDP(width=3,
               length=3,
               p_walk=0.7,
               p_run=0.7,
               reward_walk=0,
               reward_run=0,
               discount=0.1,
               e=1e-80,
               exit_list=[((0, 2), 1), ((1, 2), -1)])
     mdp.value_iteration()
     test.assert_array_almost_equal(
         np.array([[7, 3, 7], [0, 2, 0], [4, 4, 4]]), mdp.policy)
Пример #3
0
 def test_single_column_middle_wall(self):
     mdp = MDP(length=3,
               width=1,
               p_walk=0.7,
               p_run=0.7,
               reward_run=-0.1,
               reward_walk=-0.3,
               wall_list=[(1, 0)],
               exit_list=[((0, 0), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     self.assertEqual(str, "Exit\nNone\nRun Up\n")
Пример #4
0
 def test_single_row_middle_wall(self):
     mdp = MDP(length=1,
               width=3,
               p_walk=0.7,
               p_run=0.7,
               reward_run=-0.1,
               reward_walk=-0.3,
               wall_list=[(0, 1)],
               exit_list=[((0, 2), 1)],
               discount=0.7,
               e=1e-80)
     mdp.value_iteration()
     policy = mdp.out_put()
     self.assertEqual("Run Up,None,Exit\n", policy)
Пример #5
0
    def test_initial(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        test.assert_array_equal(np.array([[0, 0, 1], [0, 0, -1], [0, 0, 0]]),
                                mdp.value)

        p_up_walk_up = np.squeeze(mdp.p[:, :, mdp.up, mdp.walk_up])
        test.assert_array_equal(np.ones((3, 3)) * 0.7, p_up_walk_up)

        p_down_walk_up = np.squeeze(mdp.p[:, :, mdp.down, mdp.walk_up])
        test.assert_array_equal(np.zeros((3, 3)), p_down_walk_up)

        p_left_walk_up = np.squeeze(mdp.p[:, :, mdp.left, mdp.walk_up])
        test.assert_array_almost_equal(np.ones((3, 3)) * 0.15, p_left_walk_up)

        p_right_walk_up = np.squeeze(mdp.p[:, :, mdp.right, mdp.walk_up])
        test.assert_array_almost_equal(np.ones((3, 3)) * 0.15, p_right_walk_up)

        for direction in mdp.direction_enum:
            for action in mdp.action_enum:
                p = np.squeeze(mdp.p[:, :, direction, action])
                if direction == action:  # walk_direction = direction
                    test.assert_array_almost_equal(p, np.ones((3, 3)) * 0.7)
                elif direction == action - 4:  # run_direction = direction
                    test.assert_array_almost_equal(p, np.ones((3, 3)) * 0.6)
Пример #6
0
 def test_case1(self):
     mdp = MDP(width=6,
               length=5,
               p_walk=0.8,
               p_run=0.6,
               reward_walk=-0.3,
               reward_run=-0.2,
               wall_list=[(1, 1), (4, 3)],
               discount=0.7,
               exit_list=[((0, 2), 10), ((2, 4), 5)],
               e=1e-80)
     mdp.value_iteration()
     test.assert_equal(
         np.array([[7., 3., 0., 2., 6., 6.], [0., 0., 0., 0., 0., 2.],
                   [4., 3., 4., 4., 4., 2.], [4., 0., 4., 0., 0., 0.],
                   [4., 4., 4., 4., 4., 2.]]), mdp.policy)
Пример #7
0
    def test_wall_builder(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  wall_list=[(1, 1), (0, 1)],
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        walk_up = mdp.wall_up_1
        self.assertListEqual([(2, 1), (1, 1)], walk_up)
        walk_left = mdp.wall_left_1
        self.assertListEqual([(2, 1), (1, 1)], walk_up)
        walk_down = mdp.wall_down_1
        self.assertListEqual([(0, 1)], walk_down)
        walk_right = mdp.wall_right_2
        self.assertListEqual([(1, 0), (0, 0)], walk_right)

        run_up = mdp.wall_up_2
        self.assertListEqual([(2, 1), (1, 1), (2, 1)], run_up)
        run_left = mdp.wall_left_2
        self.assertListEqual([(1, 2), (0, 2)], run_left)
        run_down = mdp.wall_down_2
        self.assertListEqual([(0, 1)], run_down)
        run_right = mdp.wall_right_2
        self.assertListEqual([(1, 0), (0, 0)], run_right)
Пример #8
0
 def test_case_11_discount_0(self):
     configuration = Configuration()
     width, length, p_walk, p_run, r_walk, r_run, discount, wall_list, exit_list = configuration.read_file(
         "input11.txt")
     mdp = MDP(width=width,
               length=length,
               p_walk=p_walk,
               p_run=p_run,
               reward_walk=r_walk,
               reward_run=r_run,
               wall_list=wall_list,
               discount=discount,
               exit_list=exit_list,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     with open("my_output11.txt", 'w') as f:
         f.write(str)
Пример #9
0
 def test_big_accurate(self):
     configuration = Configuration()
     width, length, p_walk, p_run, r_walk, r_run, discount, wall_list, exit_list = configuration.read_file(
         "input5.txt")
     mdp = MDP(width=width,
               length=length,
               p_walk=p_walk,
               p_run=p_run,
               reward_walk=r_walk,
               reward_run=r_run,
               wall_list=wall_list,
               discount=discount,
               exit_list=exit_list,
               e=1e-80)
     mdp.value_iteration()
     str = mdp.out_put()
     with open("my_output5.txt", 'w') as f:
         f.write(str)
     with open("output_shun.txt", "r") as f:
         data = f.read()
         self.assertEqual(data, str)
Пример #10
0
    def test_policy_evaluation(self):
        mdp = MDP(width=3,
                  length=3,
                  p_walk=0.7,
                  p_run=0.6,
                  reward_walk=1,
                  reward_run=1,
                  discount=0,
                  wall_list=[(1, 1)],
                  exit_list=[((0, 2), 1), ((1, 2), -1)])
        mdp.value = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        walk_up = mdp.policy_evaluation(mdp.walk_up)
        test.assert_array_almost_equal(
            [[1.15, 2, 2.85], [1.9, 2.9, 3.9], [5.05, 8, 6.75]], walk_up)

        run_up = mdp.policy_evaluation(mdp.run_up)
        test.assert_array_almost_equal(
            np.array([[0.6 + 0.2 + 0.6, 1.2 + 0.4 + 0.4, 1.8 + 0.2 + 0.6],
                      [2.4 + 0.8 + 0.8, 5, 3.6 + 1.2 + 1.2],
                      [0.6 + 1.4 + 1.8, 4.8 + 1.6 + 1.6, 1.8 + 1.4 + 1.8]]),
            run_up)
Пример #11
0
    def test_output(self):
        mdp = MDP(width=6,
                  length=5,
                  p_walk=0.8,
                  p_run=0.6,
                  reward_walk=-0.3,
                  reward_run=-0.2,
                  wall_list=[(1, 1), (4, 3)],
                  discount=0.7,
                  exit_list=[((0, 2), 10), ((2, 4), 5)],
                  e=1e-80)
        mdp.policy = np.array([[7., 3., 0., 2., 6., 6.],
                               [0., 0., 0., 0., 0., 2.],
                               [4., 3., 4., 4., 4., 2.],
                               [4., 0., 4., 0., 0., 0.],
                               [4., 4., 4., 4., 4., 2.]])
        str = mdp.out_put()
        self.assertEqual(
            str, """Run Right,Walk Right,Exit,Walk Left,Run Left,Run Left
Walk Up,None,Walk Up,Walk Up,Walk Up,Walk Left
Run Up,Walk Right,Run Up,Run Up,Exit,Walk Left
Run Up,Walk Up,Run Up,Walk Up,Walk Up,Walk Up
Run Up,Run Up,Run Up,None,Run Up,Walk Left\n""")