def test_human_agent(): agent = Human() a1 = Action(0, 'Foo') a2 = Action(1, 'Bar') state = MdpState(1, [a1, a2], False) agent.sense(state, 0) call_num = 0 def mock_input( prompt: str ) -> str: nonlocal call_num if call_num == 0: call_num += 1 return 'asdf' else: return 'Bar' agent.get_input = mock_input # MagicMock(return_value='Bar') assert agent.act(0) == a2 with pytest.raises(NotImplementedError): rng = RandomState(12345) Human.init_from_arguments([], rng, None)
def __init__(self, name: str, random_state: RandomState, T: Optional[int], p_h: float): """ Initialize the MDP environment. :param name: Name. :param random_state: Random state. :param T: Maximum number of steps to run, or None for no limit. :param p_h: Probability of coin toss coming up heads. """ self.p_h = p_h self.p_t = 1 - p_h # the range of possible actions: stake 0 (no play) through 50 (at capital=50). beyond a capital of 50 the # agent is only allowed to stake an amount that would take them to 100 on a win. AA = [Action(i=stake, name=f'Stake {stake}') for stake in range(0, 51)] # two possible rewards: 0.0 and 1.0 self.r_not_win = Reward(0, 0.0) self.r_win = Reward(1, 1.0) RR = [self.r_not_win, self.r_win] # range of possible states (capital levels) SS = [ MdpState( i=capital, # the range of permissible actions is state dependent AA=[a for a in AA if a.i <= min(capital, 100 - capital)], terminal=capital == 0 or capital == 100) # include terminal capital levels of 0 and 100 for capital in range(0, 101) ] super().__init__(name=name, random_state=random_state, T=T, SS=SS, RR=RR) for s in self.SS: for a in self.p_S_prime_R_given_S_A[s]: # next state and reward if heads s_prime_h = self.SS[s.i + a.i] r_h = self.r_win if not s.terminal and s_prime_h.i == 100 else self.r_not_win self.p_S_prime_R_given_S_A[s][a][s_prime_h][r_h] = self.p_h # next state and reward if tails s_prime_t = self.SS[s.i - a.i] r_t = self.r_win if not s.terminal and s_prime_t.i == 100 else self.r_not_win self.p_S_prime_R_given_S_A[s][a][s_prime_t][ r_t] += self.p_t # add the probability, in case the results of head and tail are the same. self.check_marginal_probabilities()
def test_prioritized_planning_environment(): rng = RandomState(12345) planning_environment = PrioritizedSweepingMdpPlanningEnvironment( 'test', rng, StochasticEnvironmentModel(), 1, 0.3, 10) planning_environment.add_state_action_priority(MdpState(1, [], False), Action(1), 0.2) planning_environment.add_state_action_priority(MdpState(2, [], False), Action(2), 0.1) planning_environment.add_state_action_priority(MdpState(3, [], False), Action(3), 0.3) s, a = planning_environment.get_state_action_with_highest_priority() assert s.i == 2 and a.i == 2 s, a = planning_environment.get_state_action_with_highest_priority() assert s.i == 1 and a.i == 1 s, a = planning_environment.get_state_action_with_highest_priority() assert s is None and a is None
def __init__( self, name: str, random_state: RandomState, T: Optional[int], n_rows: int, n_columns: int, terminal_states: List[Tuple[int, int]], RR: List[Reward] ): """ Initialize the gridworld. :param name: Name. :param random_state: Random state. :param T: Maximum number of steps to run, or None for no limit. :param n_rows: Number of row. :param n_columns: Number of columns. :param terminal_states: List of terminal-state locations. :param RR: List of all possible rewards. """ AA = [ Action( i=i, name=direction ) for i, direction in enumerate(['u', 'd', 'l', 'r']) ] self.a_up, self.a_down, self.a_left, self.a_right = AA SS = [ MdpState( i=row_i * n_columns + col_j, AA=AA, terminal=False ) for row_i in range(n_rows) for col_j in range(n_columns) ] for row, col in terminal_states: SS[row * n_columns + col].terminal = True super().__init__( name=name, random_state=random_state, T=T, SS=SS, RR=RR ) self.grid = np.array(self.SS).reshape(n_rows, n_columns)
def test_check_state_and_action_lists(): random = RandomState(12345) gw = Gridworld.example_4_1(random, T=None) fex = GridworldFeatureExtractor(gw) states = [MdpState(i=None, AA=[], terminal=False)] actions = [Action(0)] fex.check_state_and_action_lists(states, actions) with pytest.raises(ValueError, match='Expected '): actions.clear() fex.check_state_and_action_lists(states, actions)
def test_agent_invalid_action(): random = RandomState() agent = StochasticMdpAgent('foo', random, TabularPolicy(None, None), 1.0) # test None action agent.__act__ = lambda t: None with pytest.raises(ValueError, match='Agent returned action of None'): agent.act(0) # test infeasible action action = Action(1, 'foo') agent.__act__ = lambda t: action state = MdpState(1, [], False) agent.sense(state, 0) with pytest.raises(ValueError, match=f'Action {action} is not feasible in state {state}'): agent.act(0)
def test_agent_invalid_action(): random = RandomState() agent = ActionValueMdpAgent( 'foo', random, 1.0, TabularStateActionValueEstimator(Gridworld.example_4_1(random, None), None, None)) # test None action agent.__act__ = lambda t: None with pytest.raises(ValueError, match='Agent returned action of None'): agent.act(0) # test infeasible action action = Action(1, 'foo') agent.__act__ = lambda t: action state = MdpState(1, [], False) agent.sense(state, 0) with pytest.raises( ValueError, match=f'Action {action} is not feasible in state {state}'): agent.act(0)