def test_simpletmaze():
    """Test the SimpleTMaze environment."""
    env = SimpleTMaze(2, 1, -1)
    env.start_new_episode()
    assert env.get_state() == State(x=0, y=0, symbol=0, goal_x=-1)
    expected_steps = [
        RLTestStep(
            State(x=0, y=0, symbol=0),
            [Action('up')],
            Action('up'),
            -1,
        ),
        RLTestStep(
            State(x=0, y=1, symbol=-1),
            [Action('up')],
            Action('up'),
            -1,
        ),
        RLTestStep(
            State(x=0, y=2, symbol=0),
            [Action('left'), Action('right')],
            Action('left'),
            10,
        ),
        RLTestStep(State(x=-1, y=2, symbol=0), [], None, None),
    ]
    for expected in expected_steps:
        assert env.get_observation() == expected.observation
        assert set(env.get_actions()) == set(expected.actions)
        if expected.action is not None:
            reward = env.react(expected.action)
            assert reward == expected.reward
 def get_actions(self):  # noqa: D102
     if self.row == self.col == 0:
         return []
     else:
         return [
             Action('up'),
             Action('down'),
             Action('left'),
             Action('right'),
             Action('upleft'),
             Action('upright'),
             Action('downleft'),
             Action('downright'),
         ]
Пример #3
0
 def get_actions(self):  # noqa: D102
     if self.index == -1:
         return []
     else:
         return [Action(str(i)) for i in range(-1, size * size)]
Пример #4
0
def test_memory_architecture():
    """Test the memory architecture meta-environment."""
    class TestEnv(Environment):
        """A simple environment with a single string state."""
        def __init__(self, size, index=0):
            """Initialize the TestEnv.

            Arguments:
                size (int): The length of one side of the square.
                index (int): The initial int.
            """
            super().__init__()
            self.size = size
            self.init_index = index
            self.index = self.init_index

        def get_state(self):  # noqa: D102
            return State(index=self.index)

        def get_observation(self):  # noqa: D102
            return State(index=self.index)

        def get_actions(self):  # noqa: D102
            if self.index == -1:
                return []
            else:
                return [Action(str(i)) for i in range(-1, size * size)]

        def reset(self):  # noqa: D102
            self.start_new_episode()

        def start_new_episode(self):  # noqa: D102
            self.index = self.init_index

        def react(self, action):  # noqa: D102
            assert action in self.get_actions()
            if action.name != 'no-op':
                self.index = int(action.name)
            if self.end_of_episode():
                return 100
            else:
                return -1

        def visualize(self):  # noqa: D102
            pass

    size = 5
    env = memory_architecture(TestEnv)(
        # memory architecture
        knowledge_store=NaiveDictKB(),
        # TestEnv
        size=size,
        index=0,
    )
    env.start_new_episode()
    for i in range(size * size):
        env.add_to_ltm(index=i, row=(i // size), col=(i % size))
    # test observation
    assert env.get_observation() == State(
        perceptual_index=0, ), env.get_observation()
    # test actions
    assert (set(env.get_actions()) == set([
        *(Action(str(i)) for i in range(-1, size * size)),
        Action('copy',
               src_buf='perceptual',
               src_attr='index',
               dst_buf='query',
               dst_attr='index'),
    ])), set(env.get_actions())
    # test pass-through reaction
    reward = env.react(Action('9'))
    assert env.get_observation() == State(
        perceptual_index=9, ), env.get_observation()
    assert reward == -1, reward
    # query test
    env.react(
        Action('copy',
               src_buf='perceptual',
               src_attr='index',
               dst_buf='query',
               dst_attr='index'))
    assert env.get_observation() == State(
        perceptual_index=9,
        query_index=9,
        retrieval_index=9,
        retrieval_row=1,
        retrieval_col=4,
    ), env.get_observation()
    # query with no results
    env.react(
        Action('copy',
               src_buf='retrieval',
               src_attr='row',
               dst_buf='query',
               dst_attr='row'))
    env.react(Action('0'))
    env.react(
        Action('copy',
               src_buf='perceptual',
               src_attr='index',
               dst_buf='query',
               dst_attr='index'))
    assert env.get_observation() == State(
        perceptual_index=0,
        query_index=0,
        query_row=1,
    ), env.get_observation()
    # delete test
    env.react(Action('delete', buf='query', attr='index'))
    assert env.get_observation() == State(
        perceptual_index=0,
        query_row=1,
        retrieval_index=5,
        retrieval_row=1,
        retrieval_col=0,
    ), env.get_observation()
    # next result test
    env.react(Action('next-result'))
    assert env.get_observation() == State(
        perceptual_index=0,
        query_row=1,
        retrieval_index=6,
        retrieval_row=1,
        retrieval_col=1,
    ), env.get_observation()
    # delete test
    env.react(Action('prev-result'))
    assert env.get_observation() == State(
        perceptual_index=0,
        query_row=1,
        retrieval_index=5,
        retrieval_row=1,
        retrieval_col=0,
    ), env.get_observation()
    # complete the environment
    reward = env.react(Action('-1'))
    assert env.end_of_episode()
    assert reward == 100, reward
def test_simpletmaze_gatingmemory():
    """Test the gating memory meta-environment."""
    env = gating_memory(SimpleTMaze)(
        num_memory_slots=1,
        reward=-0.05,
        length=2,
        hint_pos=1,
    )
    env.start_new_episode()
    goal = env.get_state().goal_x
    assert env.get_state() == State(x=0,
                                    y=0,
                                    symbol=0,
                                    goal_x=goal,
                                    memory_0=None)
    expected_steps = [
        RLTestStep(
            State(x=0, y=0, symbol=0, memory_0=None),
            [
                Action('up'),
                Action('gate', slot=0, attribute='x'),
                Action('gate', slot=0, attribute='y'),
                Action('gate', slot=0, attribute='symbol'),
            ],
            Action('up'),
            -1,
        ),
        RLTestStep(
            State(x=0, y=1, symbol=goal, memory_0=None),
            [
                Action('up'),
                Action('gate', slot=0, attribute='x'),
                Action('gate', slot=0, attribute='y'),
                Action('gate', slot=0, attribute='symbol'),
            ],
            Action('gate', slot=0, attribute='symbol'),
            -0.05,
        ),
        RLTestStep(
            State(x=0, y=1, symbol=goal, memory_0=goal),
            [
                Action('up'),
                Action('gate', slot=0, attribute='x'),
                Action('gate', slot=0, attribute='y'),
                Action('gate', slot=0, attribute='symbol'),
            ],
            Action('up'),
            -1,
        ),
        RLTestStep(
            State(x=0, y=2, symbol=0, memory_0=goal),
            [
                Action('left'),
                Action('right'),
                Action('gate', slot=0, attribute='x'),
                Action('gate', slot=0, attribute='y'),
                Action('gate', slot=0, attribute='symbol'),
            ],
            Action('right' if goal == -1 else 'left'),
            -10,
        ),
        RLTestStep(
            State(x=1 if goal == -1 else -1, y=2, symbol=0, memory_0=goal), [],
            None, None),
    ]
    for expected in expected_steps:
        assert env.get_observation() == expected.observation
        assert set(env.get_actions()) == set(expected.actions)
        if expected.action is not None:
            reward = env.react(expected.action)
            assert reward == expected.reward
def test_linear_agent():
    """Test the linear approximation Q-learning agent."""
    class InfiniteGridWorld(Environment, RandomMixin):
        """An infinite gridworld. Goal is (0, 0)."""
        def __init__(self, max_size, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.max_size = max_size
            self.row = 0
            self.col = 0

        def get_state(self):  # noqa: D102
            return State(row=self.row, col=self.col)

        def get_actions(self):  # noqa: D102
            if self.row == self.col == 0:
                return []
            else:
                return [
                    Action('up'),
                    Action('down'),
                    Action('left'),
                    Action('right'),
                    Action('upleft'),
                    Action('upright'),
                    Action('downleft'),
                    Action('downright'),
                ]

        def reset(self):  # noqa: D102
            self.start_new_episode()

        def start_new_episode(self):  # noqa: D102
            while self.row == self.col == 0:
                self.row = self.rng.randrange(-self.max_size,
                                              self.max_size + 1)
                self.col = self.rng.randrange(-self.max_size,
                                              self.max_size + 1)

        def react(self, action=None):  # noqa: D102
            assert action in self.get_actions()
            if 'up' in action.name:
                self.row -= 1
            if 'down' in action.name:
                self.row += 1
            if 'left' in action.name:
                self.col -= 1
            if 'right' in action.name:
                self.col += 1
            if self.row == self.col == 0:
                return 1
            else:
                return 0

        def visualize(self):  # noqa: D102
            raise NotImplementedError

    def feature_extractor(state,
                          action=None):  # pylint: disable = unused-argument
        return {
            'row': (0 if state['row'] == 0 else copysign(1, state['row'])),
            'col': (0 if state['col'] == 0 else copysign(1, state['col'])),
        }

    size = 1000
    env = InfiniteGridWorld(max_size=size)
    agent = LinearQLearner(
        learning_rate=0.1,
        discount_rate=0.9,
        feature_extractor=feature_extractor,
    )
    # train the agent
    for _ in range(50):
        env.start_new_episode()
        while not env.end_of_episode():
            observation = env.get_observation()
            name = ''
            if observation['row'] < 0:
                name += 'down'
            elif observation['row'] > 0:
                name += 'up'
            if observation['col'] < 0:
                name += 'right'
            elif observation['col'] > 0:
                name += 'left'
            action = Action(name)
            action = agent.force_act(observation, action)
            reward = env.react(action)
            agent.observe_reward(env.get_observation(), reward)
    # test that the agent can finish within `2 * size` steps
    for _ in range(50):
        env.start_new_episode()
        step = 2 * size
        while step > 0 and not env.end_of_episode():
            observation = env.get_observation()
            action = agent.act(observation, env.get_actions())
            reward = env.react(action)
            step -= 1
        assert env.end_of_episode()
def test_gridworld():
    """Test the GridWorld environment."""
    env = GridWorld(
        width=2,
        height=3,
        start=[0, 0],
        goal=[2, 0],
    )
    env.start_new_episode()
    expected_steps = [
        RLTestStep(State(row=0, col=0),
                   [Action('down'), Action('right')], Action('right'), -1),
        RLTestStep(State(row=0, col=1),
                   [Action('down'), Action('left')], Action('down'), -1),
        RLTestStep(State(row=1, col=1),
                   [Action('up'), Action('down'),
                    Action('left')], Action('down'), -1),
        RLTestStep(State(row=2, col=1),
                   [Action('up'), Action('left')], Action('up'), -1),
        RLTestStep(State(row=1, col=1),
                   [Action('up'), Action('down'),
                    Action('left')], Action('left'), -1),
        RLTestStep(
            State(row=1, col=0),
            [Action('up'), Action('down'),
             Action('right')], Action('down'), 1),
        RLTestStep(State(row=2, col=0), [], None, None),
    ]
    for expected in expected_steps:
        assert env.get_observation() == expected.observation
        assert set(env.get_actions()) == set(expected.actions)
        if expected.action is not None:
            reward = env.react(expected.action)
            assert reward == expected.reward
def test_simpletmaze_fixedltm():
    """Test the fixed LTM meta-environment."""
    env = fixed_long_term_memory(SimpleTMaze)(
        num_wm_slots=1,
        num_ltm_slots=1,
        reward=-0.05,
        length=2,
        hint_pos=1,
        goal_x=1,
    )
    env.start_new_episode()
    assert env.get_state() == State(x=0,
                                    y=0,
                                    symbol=0,
                                    goal_x=1,
                                    wm_0=None,
                                    ltm_0=None)
    expected_steps = [
        RLTestStep(
            State(x=0, y=0, symbol=0, wm_0=None),
            [
                Action('up'),
                Action('store', slot=0, attribute='x'),
                Action('store', slot=0, attribute='y'),
                Action('store', slot=0, attribute='symbol'),
                Action('retrieve', wm_slot=0, ltm_slot=0),
            ],
            Action('up'),
            -1,
        ),
        RLTestStep(
            State(x=0, y=1, symbol=1, wm_0=None),
            [
                Action('up'),
                Action('store', slot=0, attribute='x'),
                Action('store', slot=0, attribute='y'),
                Action('store', slot=0, attribute='symbol'),
                Action('retrieve', wm_slot=0, ltm_slot=0),
            ],
            Action('store', slot=0, attribute='symbol'),
            -0.05,
        ),
        RLTestStep(
            State(x=0, y=1, symbol=1, wm_0=None),
            [
                Action('up'),
                Action('store', slot=0, attribute='x'),
                Action('store', slot=0, attribute='y'),
                Action('store', slot=0, attribute='symbol'),
                Action('retrieve', wm_slot=0, ltm_slot=0),
            ],
            Action('up'),
            -1,
        ),
        RLTestStep(
            State(x=0, y=2, symbol=0, wm_0=None),
            [
                Action('left'),
                Action('right'),
                Action('store', slot=0, attribute='x'),
                Action('store', slot=0, attribute='y'),
                Action('store', slot=0, attribute='symbol'),
                Action('retrieve', wm_slot=0, ltm_slot=0),
            ],
            Action('retrieve', wm_slot=0, ltm_slot=0),
            -0.05,
        ),
        RLTestStep(
            State(x=0, y=2, symbol=0, wm_0=1),
            [
                Action('left'),
                Action('right'),
                Action('store', slot=0, attribute='x'),
                Action('store', slot=0, attribute='y'),
                Action('store', slot=0, attribute='symbol'),
                Action('retrieve', wm_slot=0, ltm_slot=0),
            ],
            Action('right'),
            10,
        ),
        RLTestStep(State(x=1, y=2, symbol=0, wm_0=1), [], None, None),
    ]
    for expected in expected_steps:
        assert env.get_observation() == expected.observation
        assert set(env.get_actions()) == set(expected.actions)
        if expected.action is not None:
            reward = env.react(expected.action)
            assert reward == expected.reward