Exemplo n.º 1
0
    def test_state_transition(self):

        # Transition to arbitrary state
        mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,b1)'},
                          state_static={'subgoal(b1,b2)'})

        next_state, next_reward = mdp.transition('move(b2,table)')
        self.assertEqual({'on(b1,table)', 'on(b2,table)'}, next_state)
        self.assertEqual(-1, next_reward)
        self.assertEqual({'on(b1,table)', 'on(b2,table)'}, mdp.state)

        next_state, next_reward = mdp.transition('move(b1,b2)')
        self.assertEqual({'on(b1,b2)', 'on(b2,table)'}, next_state)
        self.assertEqual(99, next_reward)
        self.assertEqual({'on(b1,b2)', 'on(b2,table)'}, mdp.state)

        # Check if trajectory is correct: S0, A0, R1, S1, A1, R2, S2
        self.assertEqual({'on(b1,table)', 'on(b2,b1)'},
                         mdp.state_history[0])  # S0
        self.assertEqual('move(b2,table)', mdp.action_history[0])  # A0
        self.assertEqual(-1, mdp.reward_history[1])  # R1
        self.assertEqual({'on(b1,table)', 'on(b2,table)'},
                         mdp.state_history[1])  #S1
        self.assertEqual('move(b1,b2)', mdp.action_history[1])  # A1
        self.assertEqual(100 - 1, mdp.reward_history[2])  # R2
        self.assertEqual({'on(b1,b2)', 'on(b2,table)'},
                         mdp.state_history[2])  #S2
Exemplo n.º 2
0
    def test_returns_1(self):

        # Optimal way to goal
        mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,table)'},
                          state_static={'subgoal(b2,b1)'})
        mdp.transition('move(b2,b1)')

        # G[t] = R[t+1] + ...
        self.assertEqual(mdp.return_history[0], 99)
Exemplo n.º 3
0
    def test_available_actions_4(self):

        mdp = BlocksWorld(
            state_initial={'on(b2,table)', 'on(b1,table)', 'on(b3,table)'},
            state_static={'subgoal(b2,b1)'})

        # Available actions should be updated after state transitions.
        mdp.transition('move(b1,b2)')
        self.assertEqual({'move(b1,table)', 'move(b1,b3)', 'move(b3,b1)'},
                         mdp.available_actions)
Exemplo n.º 4
0
    def test_blocksworld_1(self):

        mdp_builder = BlocksWorldBuilder(blocks_world_size=2)
        mdp = BlocksWorld(state_initial={'on(b0,table)', 'on(b1,table)'},
                          state_static={'subgoal(b1,b0)'})

        planner = PlannerPolicy(planning_horizon=1, mdp_builder=mdp_builder)

        suggested_action, expected_return = planner.suggest_action_and_return_for_state(
            mdp.state)

        self.assertEqual(suggested_action,
                         planner.suggest_action_for_state(mdp.state))

        mdp.transition(suggested_action)

        self.assertEqual('move(b1,b0)', suggested_action)
        self.assertEqual(mdp.return_history[0], expected_return)
Exemplo n.º 5
0
    def test_returns_2(self):

        # Undiscounted return

        mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,table)'},
                          state_static={'subgoal(b2,b1)'})
        mdp.transition('move(b1,b2)')
        mdp.transition('move(b1,table)')
        mdp.transition('move(b2,b1)')

        # G[t] = R[t+1] + R[t+2] + R[t+3]
        self.assertEqual(mdp.return_history[0], -1 + -1 + 99)
        self.assertEqual(mdp.return_history[1], -1 + 99)
        self.assertEqual(mdp.return_history[2], 99)
        self.assertEqual(mdp.return_history[3],
                         0)  # Return is zero in terminal state