def observation(self, board): """Convert observation to numpy array with a unique channel for each tile. A `Board` cannot be used as an observaton. RLlib will complain and crash because RLlib expects arrays as observations. Therefore, we convert the `Board` to a numpy array, where the first channel has value 1 if it's empty. The second channel correspond to tiles with value 2, the third with value 3 and so on. The number of channels in the observation will be `1 + log2(max_tile_value)`. For example, `max_tile_value == 256` --> we have 9 tile values. Note: We assume all tiles are a multiple of 2! Returns: A dict with the following keys and values: - 'valid_action_mask': np.ndarray(4, float) The available actions - 'board': np.ndarray((n_rows, n_cols, n_channels), float) The board (in one-hot format). """ channel_indices = np.log2(np.where(board.values == 0, 1, board.values)) frac_values, _ = np.modf(channel_indices) if not frac_values.max() == 0: raise ValueError( "Unexpected input: got a tile that was not a power of 2. Can't " "safely convert observation.") channel_indices = channel_indices.astype(int) yy, xx = np.meshgrid(*[range(dim) for dim in channel_indices.shape]) one_hot_board = np.zeros(self.env.observation_space["board"].shape) if K.image_data_format() == "channels_first": one_hot_board[channel_indices.ravel(), yy.ravel(), xx.ravel()] = 1.0 else: one_hot_board[yy.ravel(), xx.ravel(), channel_indices.ravel()] = 1.0 valid_action_mask = np.zeros(4) for action in Board.get_available_actions(board): index = action.value - 1 # enums are 1-indexed, so we subtract by 1. valid_action_mask[index] = 1.0 processed_obs = { "valid_action_mask": valid_action_mask, "board": one_hot_board } return processed_obs
def test_available_actions_work_as_expected(): s = """1 2 3 1 4 5 6 7 8""" board = board_from_string(s) available_actions = Board.get_available_actions(board) assert available_actions == set([Action.DOWN, Action.UP]) s = """ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16""" board = board_from_string(s) available_actions = Board.get_available_actions(board) assert available_actions == set([]) s = """ 1 2 2 4 5 2 7 8 9 10 11 12 13 14 15 16""" board = board_from_string(s) available_actions = Board.get_available_actions(board) assert available_actions == set([Action.LEFT, Action.RIGHT, Action.UP, Action.DOWN])