Python MdpState示例，rlai.states.mdp.MdpState Python示例

示例#1

0

显示文件

def test_human_agent():

    agent = Human()

    a1 = Action(0, 'Foo')
    a2 = Action(1, 'Bar')

    state = MdpState(1, [a1, a2], False)
    agent.sense(state, 0)

    call_num = 0

    def mock_input(
            prompt: str
    ) -> str:

        nonlocal call_num
        if call_num == 0:
            call_num += 1
            return 'asdf'
        else:
            return 'Bar'

    agent.get_input = mock_input  # MagicMock(return_value='Bar')

    assert agent.act(0) == a2

    with pytest.raises(NotImplementedError):
        rng = RandomState(12345)
        Human.init_from_arguments([], rng, None)

示例#2

0

显示文件

    def __init__(self, name: str, random_state: RandomState, T: Optional[int],
                 p_h: float):
        """
        Initialize the MDP environment.

        :param name: Name.
        :param random_state: Random state.
        :param T: Maximum number of steps to run, or None for no limit.
        :param p_h: Probability of coin toss coming up heads.
        """

        self.p_h = p_h
        self.p_t = 1 - p_h

        # the range of possible actions:  stake 0 (no play) through 50 (at capital=50). beyond a capital of 50 the
        # agent is only allowed to stake an amount that would take them to 100 on a win.
        AA = [Action(i=stake, name=f'Stake {stake}') for stake in range(0, 51)]

        # two possible rewards:  0.0 and 1.0
        self.r_not_win = Reward(0, 0.0)
        self.r_win = Reward(1, 1.0)
        RR = [self.r_not_win, self.r_win]

        # range of possible states (capital levels)
        SS = [
            MdpState(
                i=capital,

                # the range of permissible actions is state dependent
                AA=[a for a in AA if a.i <= min(capital, 100 - capital)],
                terminal=capital == 0 or capital == 100)

            # include terminal capital levels of 0 and 100
            for capital in range(0, 101)
        ]

        super().__init__(name=name,
                         random_state=random_state,
                         T=T,
                         SS=SS,
                         RR=RR)

        for s in self.SS:
            for a in self.p_S_prime_R_given_S_A[s]:

                # next state and reward if heads
                s_prime_h = self.SS[s.i + a.i]
                r_h = self.r_win if not s.terminal and s_prime_h.i == 100 else self.r_not_win
                self.p_S_prime_R_given_S_A[s][a][s_prime_h][r_h] = self.p_h

                # next state and reward if tails
                s_prime_t = self.SS[s.i - a.i]
                r_t = self.r_win if not s.terminal and s_prime_t.i == 100 else self.r_not_win
                self.p_S_prime_R_given_S_A[s][a][s_prime_t][
                    r_t] += self.p_t  # add the probability, in case the results of head and tail are the same.

        self.check_marginal_probabilities()

示例#3

0

显示文件

def test_prioritized_planning_environment():

    rng = RandomState(12345)

    planning_environment = PrioritizedSweepingMdpPlanningEnvironment(
        'test', rng, StochasticEnvironmentModel(), 1, 0.3, 10)

    planning_environment.add_state_action_priority(MdpState(1, [], False),
                                                   Action(1), 0.2)
    planning_environment.add_state_action_priority(MdpState(2, [], False),
                                                   Action(2), 0.1)
    planning_environment.add_state_action_priority(MdpState(3, [], False),
                                                   Action(3), 0.3)

    s, a = planning_environment.get_state_action_with_highest_priority()
    assert s.i == 2 and a.i == 2
    s, a = planning_environment.get_state_action_with_highest_priority()
    assert s.i == 1 and a.i == 1
    s, a = planning_environment.get_state_action_with_highest_priority()
    assert s is None and a is None

示例#4

0

显示文件

    def __init__(
            self,
            name: str,
            random_state: RandomState,
            T: Optional[int],
            n_rows: int,
            n_columns: int,
            terminal_states: List[Tuple[int, int]],
            RR: List[Reward]
    ):
        """
        Initialize the gridworld.

        :param name: Name.
        :param random_state: Random state.
        :param T: Maximum number of steps to run, or None for no limit.
        :param n_rows: Number of row.
        :param n_columns: Number of columns.
        :param terminal_states: List of terminal-state locations.
        :param RR: List of all possible rewards.
        """

        AA = [
            Action(
                i=i,
                name=direction
            )
            for i, direction in enumerate(['u', 'd', 'l', 'r'])
        ]

        self.a_up, self.a_down, self.a_left, self.a_right = AA

        SS = [
            MdpState(
                i=row_i * n_columns + col_j,
                AA=AA,
                terminal=False
            )
            for row_i in range(n_rows)
            for col_j in range(n_columns)
        ]

        for row, col in terminal_states:
            SS[row * n_columns + col].terminal = True

        super().__init__(
            name=name,
            random_state=random_state,
            T=T,
            SS=SS,
            RR=RR
        )

        self.grid = np.array(self.SS).reshape(n_rows, n_columns)

示例#5

0

显示文件

文件： feature_extraction_test.py 项目： MatthewGerber/rlai

def test_check_state_and_action_lists():

    random = RandomState(12345)
    gw = Gridworld.example_4_1(random, T=None)
    fex = GridworldFeatureExtractor(gw)

    states = [MdpState(i=None, AA=[], terminal=False)]
    actions = [Action(0)]
    fex.check_state_and_action_lists(states, actions)

    with pytest.raises(ValueError, match='Expected '):
        actions.clear()
        fex.check_state_and_action_lists(states, actions)

示例#6

0

显示文件

def test_agent_invalid_action():

    random = RandomState()
    agent = StochasticMdpAgent('foo', random, TabularPolicy(None, None), 1.0)

    # test None action
    agent.__act__ = lambda t: None

    with pytest.raises(ValueError, match='Agent returned action of None'):
        agent.act(0)

    # test infeasible action
    action = Action(1, 'foo')
    agent.__act__ = lambda t: action
    state = MdpState(1, [], False)
    agent.sense(state, 0)
    with pytest.raises(ValueError, match=f'Action {action} is not feasible in state {state}'):
        agent.act(0)

示例#7

0

显示文件

def test_agent_invalid_action():

    random = RandomState()
    agent = ActionValueMdpAgent(
        'foo', random, 1.0,
        TabularStateActionValueEstimator(Gridworld.example_4_1(random, None),
                                         None, None))

    # test None action
    agent.__act__ = lambda t: None

    with pytest.raises(ValueError, match='Agent returned action of None'):
        agent.act(0)

    # test infeasible action
    action = Action(1, 'foo')
    agent.__act__ = lambda t: action
    state = MdpState(1, [], False)
    agent.sense(state, 0)
    with pytest.raises(
            ValueError,
            match=f'Action {action} is not feasible in state {state}'):
        agent.act(0)