def test_state_transition(self): # Transition to arbitrary state mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,b1)'}, state_static={'subgoal(b1,b2)'}) next_state, next_reward = mdp.transition('move(b2,table)') self.assertEqual({'on(b1,table)', 'on(b2,table)'}, next_state) self.assertEqual(-1, next_reward) self.assertEqual({'on(b1,table)', 'on(b2,table)'}, mdp.state) next_state, next_reward = mdp.transition('move(b1,b2)') self.assertEqual({'on(b1,b2)', 'on(b2,table)'}, next_state) self.assertEqual(99, next_reward) self.assertEqual({'on(b1,b2)', 'on(b2,table)'}, mdp.state) # Check if trajectory is correct: S0, A0, R1, S1, A1, R2, S2 self.assertEqual({'on(b1,table)', 'on(b2,b1)'}, mdp.state_history[0]) # S0 self.assertEqual('move(b2,table)', mdp.action_history[0]) # A0 self.assertEqual(-1, mdp.reward_history[1]) # R1 self.assertEqual({'on(b1,table)', 'on(b2,table)'}, mdp.state_history[1]) #S1 self.assertEqual('move(b1,b2)', mdp.action_history[1]) # A1 self.assertEqual(100 - 1, mdp.reward_history[2]) # R2 self.assertEqual({'on(b1,b2)', 'on(b2,table)'}, mdp.state_history[2]) #S2
def test_returns_1(self): # Optimal way to goal mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,table)'}, state_static={'subgoal(b2,b1)'}) mdp.transition('move(b2,b1)') # G[t] = R[t+1] + ... self.assertEqual(mdp.return_history[0], 99)
def test_available_actions_4(self): mdp = BlocksWorld( state_initial={'on(b2,table)', 'on(b1,table)', 'on(b3,table)'}, state_static={'subgoal(b2,b1)'}) # Available actions should be updated after state transitions. mdp.transition('move(b1,b2)') self.assertEqual({'move(b1,table)', 'move(b1,b3)', 'move(b3,b1)'}, mdp.available_actions)
def test_available_actions_5(self): mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,b1)'}, state_static={'subgoal(b2,b1)'}) # No actions available in the goal state self.assertEqual(set(), mdp.available_actions)
def test_available_actions_3(self): mdp = BlocksWorld( state_initial={'on(b2,table)', 'on(b1,b2)', 'on(b3,table)'}, state_static={'subgoal(b2,b1)'}) self.assertEqual({'move(b1,table)', 'move(b1,b3)', 'move(b3,b1)'}, mdp.available_actions)
def test_blocksworld_1(self): mdp_builder = BlocksWorldBuilder(blocks_world_size=2) mdp = BlocksWorld(state_initial={'on(b0,table)', 'on(b1,table)'}, state_static={'subgoal(b1,b0)'}) planner = PlannerPolicy(planning_horizon=1, mdp_builder=mdp_builder) suggested_action, expected_return = planner.suggest_action_and_return_for_state( mdp.state) self.assertEqual(suggested_action, planner.suggest_action_for_state(mdp.state)) mdp.transition(suggested_action) self.assertEqual('move(b1,b0)', suggested_action) self.assertEqual(mdp.return_history[0], expected_return)
def test_returns_2(self): # Undiscounted return mdp = BlocksWorld(state_initial={'on(b1,table)', 'on(b2,table)'}, state_static={'subgoal(b2,b1)'}) mdp.transition('move(b1,b2)') mdp.transition('move(b1,table)') mdp.transition('move(b2,b1)') # G[t] = R[t+1] + R[t+2] + R[t+3] self.assertEqual(mdp.return_history[0], -1 + -1 + 99) self.assertEqual(mdp.return_history[1], -1 + 99) self.assertEqual(mdp.return_history[2], 99) self.assertEqual(mdp.return_history[3], 0) # Return is zero in terminal state
def test_blocksworld_optimal_return(self): mdp_builder = BlocksWorldBuilder(blocks_world_size=5) mdp = BlocksWorld(state_initial={ 'on(b2,b1)', 'on(b0,b3)', 'on(b4,table)', 'on(b1,table)', 'on(b3,table)' }, state_static={ 'subgoal(b0,table)', 'subgoal(b1,b0)', 'subgoal(b2,b1)', 'subgoal(b3,b2)', 'subgoal(b4,b3)' }) planner = PlannerPolicy(planning_horizon=2 * 5 + 1, mdp_builder=mdp_builder) self.assertEqual(94, planner.compute_optimal_return_for_state(mdp.state))