Пример #1
0
    def observation(self, board):
        """Convert observation to numpy array with a unique channel for each tile.

        A `Board` cannot be used as an observaton. RLlib will complain and crash because
        RLlib expects arrays as observations. Therefore, we convert the `Board` to a
        numpy array, where the first channel has value 1 if it's empty.  The second
        channel correspond to tiles with value 2, the third with value 3 and so on.

        The number of channels in the observation will be `1 + log2(max_tile_value)`.
        For example, `max_tile_value == 256` --> we have 9 tile values.

        Note:
            We assume all tiles are a multiple of 2!

        Returns:
            A dict with the following keys and values:
                - 'valid_action_mask': np.ndarray(4, float)
                    The available actions
                - 'board': np.ndarray((n_rows, n_cols, n_channels), float)
                    The board (in one-hot format).
        """
        channel_indices = np.log2(np.where(board.values == 0, 1, board.values))

        frac_values, _ = np.modf(channel_indices)
        if not frac_values.max() == 0:
            raise ValueError(
                "Unexpected input: got a tile that was not a power of 2. Can't "
                "safely convert observation.")
        channel_indices = channel_indices.astype(int)

        yy, xx = np.meshgrid(*[range(dim) for dim in channel_indices.shape])

        one_hot_board = np.zeros(self.env.observation_space["board"].shape)
        if K.image_data_format() == "channels_first":
            one_hot_board[channel_indices.ravel(),
                          yy.ravel(),
                          xx.ravel()] = 1.0
        else:
            one_hot_board[yy.ravel(),
                          xx.ravel(),
                          channel_indices.ravel()] = 1.0

        valid_action_mask = np.zeros(4)
        for action in Board.get_available_actions(board):
            index = action.value - 1  # enums are 1-indexed, so we subtract by 1.
            valid_action_mask[index] = 1.0

        processed_obs = {
            "valid_action_mask": valid_action_mask,
            "board": one_hot_board
        }
        return processed_obs
Пример #2
0
def test_available_actions_work_as_expected():
    s = """1 2 3
           1 4 5
           6 7 8"""
    board = board_from_string(s)
    available_actions = Board.get_available_actions(board)
    assert available_actions == set([Action.DOWN, Action.UP])

    s = """ 1  2  3  4
            5  6  7  8
            9 10 11 12
           13 14 15 16"""
    board = board_from_string(s)
    available_actions = Board.get_available_actions(board)
    assert available_actions == set([])

    s = """ 1  2  2  4
            5  2  7  8
            9 10 11 12
           13 14 15 16"""
    board = board_from_string(s)
    available_actions = Board.get_available_actions(board)
    assert available_actions == set([Action.LEFT, Action.RIGHT, Action.UP, Action.DOWN])