Пример #1
0
def draw_info(batch, window_width, window_height, upper_grid_coord, state):
    turn = GoGame.get_turn(state)
    turn_str = 'B' if turn == BLACK else 'W'
    prev_player_passed = GoGame.get_prev_player_passed(state)
    game_ended = GoGame.get_game_ended(state)
    info_label = "Turn: {}\nPassed: {}\nGame: {}".format(
        turn_str, prev_player_passed, "OVER" if game_ended else "ONGOING")

    pyglet.text.Label(info_label,
                      font_name='Helvetica',
                      font_size=11,
                      x=window_width - 20,
                      y=window_height - 20,
                      anchor_x='right',
                      anchor_y='top',
                      color=(0, 0, 0, 192),
                      batch=batch,
                      width=window_width / 2,
                      align='right',
                      multiline=True)

    # Areas
    black_area, white_area = GoGame.get_areas(state)
    pyglet.text.Label("{}B | {}W".format(black_area, white_area),
                      font_name='Helvetica',
                      font_size=16,
                      x=window_width / 2,
                      y=upper_grid_coord + 80,
                      anchor_x='center',
                      color=(0, 0, 0, 192),
                      batch=batch,
                      width=window_width,
                      align='center')
Пример #2
0
 def step(self, action):
     '''
     Assumes the correct player is making a move. Black goes first.
     return observation, reward, done, info
     '''
     if action is None:
         action = self.size**2
     elif isinstance(action, tuple) or isinstance(
             action, list) or isinstance(action, np.ndarray):
         assert action[0] >= 0 and action[1] >= 0
         assert action[0] < self.size and action[1] < self.size
         action = action[0] * self.size + action[1]
     if self.children is not None:
         valid_moves = self.get_valid_moves()
         child_idx = int(np.sum(valid_moves[:action]))
         self.state, self.group_map = self.children[
             child_idx], self.child_groupmaps[child_idx]
     else:
         self.state, self.group_map = GoGame.get_next_state(self.state,
                                                            action,
                                                            self.group_map,
                                                            inplace=True)
     self.clear_cache()
     return np.copy(self.state), self.get_reward(), GoGame.get_game_ended(
         self.state), self.get_info()
Пример #3
0
 def get_info(self):
     """
     :return: Debugging info for the state
     """
     return {
         'prev_player_passed': GoGame.get_prev_player_passed(self.state),
         'turn': 'b' if GoGame.get_turn(self.state) == GoEnv.govars.BLACK else 'w',
         'game_ended': GoGame.get_game_ended(self.state)
     }
Пример #4
0
    def cache_children(self, canonical=False):
        """
        :return: Same as get_children, but in canonical form
        """
        self.children, self.child_groupmaps = GoGame.get_children(
            self.state, self.group_map)
        children = self.children.copy()
        child_groupmaps = self.child_groupmaps.copy()
        if canonical:
            for i in range(len(children)):
                children[i] = GoGame.get_canonical_form(children[i])

        return children, child_groupmaps
Пример #5
0
 def __init__(self, size, reward_method='real', black_first=True):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     self.state = GoGame.get_init_board(size, black_first)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size))
     self.action_space = gym.spaces.Discrete(
         GoGame.get_action_size(self.state))
     self.group_map = np.empty(self.state.shape[1:], dtype=object)
     self.clear_cache()
Пример #6
0
 def step(self, action):
     '''
     Assumes the correct player is making a move. Black goes first.
     return observation, reward, done, info
     '''
     if action is None:
         action = self.size**2
     elif isinstance(action, tuple) or isinstance(
             action, list) or isinstance(action, np.ndarray):
         assert action[0] >= 0 and action[1] >= 0
         assert action[0] < self.size and action[1] < self.size
         action = action[0] * self.size + action[1]
     self.state = GoGame.get_next_state(self.state, action)
     return np.copy(self.state), self.get_reward(), GoGame.get_game_ended(
         self.state), self.get_info()
Пример #7
0
 def __init__(self, size, komi=0, reward_method='real'):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     self.komi = komi
     self.state = GoGame.get_init_board(size)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS),
                                             shape=(govars.NUM_CHNLS, size, size))
     self.action_space = gym.spaces.Discrete(GoGame.get_action_size(self.state))
     self.group_map = [set(), set()]
     self.done = False
Пример #8
0
 def reset(self, black_first=True):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     self.state = GoGame.get_init_board(self.size, black_first)
     self.group_map = np.empty(self.state.shape[1:], dtype=object)
     self.clear_cache()
     return np.copy(self.state)
Пример #9
0
 def reset(self):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     self.state = GoGame.get_init_board(self.size)
     self.group_map = [set(), set()]
     self.done = False
     return np.copy(self.state)
Пример #10
0
    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size ** 2

        actions = np.array([action])
        states, group_maps = GoGame.get_next_states(self.state, actions, self.group_map)
        self.state, self.group_map = states[0], group_maps[0]
        self.done = GoGame.get_game_ended(self.state)
        return np.copy(self.state), self.get_reward(), self.done, self.get_info()
Пример #11
0
 def reset(self, black_first=True, state=None):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     if state is None:
         self.state = GoGame.get_init_board(self.size, black_first)
     else:
         assert state.shape[1] == self.size
         self.state = np.copy(state)
     return np.copy(self.state)
Пример #12
0
 def __init__(self,
              size,
              reward_method='real',
              black_first=True,
              state=None):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     if state is None:
         self.state = GoGame.get_init_board(size, black_first)
     else:
         assert state.shape[1] == size
         self.state = np.copy(state)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size))
     self.action_space = gym.spaces.Discrete(
         GoGame.get_action_size(self.state))
Пример #13
0
    def step_batch(self, state, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        But next step will not change the previous state
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        actions = np.array([action])
        next_states, next_group_maps = GoGame.get_batch_next_states(
            state, actions, self.group_map)
        next_state, next_group_map = next_states[0], next_group_maps[0]
        next_done = GoGame.get_game_ended(next_state)
        return np.copy(next_state), self.get_reward_batch(
            next_state, next_done), next_done, self.get_info_batch(next_state)
Пример #14
0
    def get_winning(self):
        """
        :return: Who's currently winning, regardless if the game is over
        """
        black_area, white_area = GoGame.get_areas(self.state)
        area_difference = black_area - white_area

        if area_difference > 0:
            return 1
        elif area_difference == 0:
            return 0.5
        else:
            assert area_difference < 0
            return 0
Пример #15
0
    def get_reward(self):
        '''
        Return reward based on reward_method.
        heuristic: black total area - white total area
        real: 0 for in-game move, 1 for winning, 0 for losing,
            0.5 for draw, from black player's perspective.
            Winning and losing based on the Area rule
            Also known as Trump Taylor Scoring
        Area rule definition: https://en.wikipedia.org/wiki/Rules_of_Go#End
        '''
        if self.reward_method == RewardMethod.REAL:
            return self.get_winner()

        elif self.reward_method == RewardMethod.HEURISTIC:
            black_area, white_area = GoGame.get_areas(self.state)
            area_difference = black_area - white_area
            if self.game_ended():
                return (1 if area_difference > 0 else -1) * self.size**2
            return area_difference
        else:
            raise Exception("Unknown Reward Method")
Пример #16
0
 def turn(self):
     return GoGame.get_turn(self.state)
Пример #17
0
class GoEnv(gym.Env):
    metadata = {'render.modes': ['terminal', 'human']}
    gogame = GoGame()
    govars = govars

    def __init__(self, size, reward_method='real'):
        '''
        @param reward_method: either 'heuristic' or 'real'
        heuristic: gives # black pieces - # white pieces.
        real: gives 0 for in-game move, 1 for winning, -1 for losing,
            0 for draw, all from black player's perspective
        '''
        self.size = size
        self.state = GoGame.get_init_board(size)
        self.reward_method = RewardMethod(reward_method)
        self.observation_space = gym.spaces.Box(0,
                                                govars.NUM_CHNLS,
                                                shape=(govars.NUM_CHNLS, size,
                                                       size))
        self.action_space = gym.spaces.Discrete(
            GoGame.get_action_size(self.state))
        self.group_map = [set(), set()]
        self.done = False

    def reset(self):
        '''
        Reset state, go_board, curr_player, prev_player_passed,
        done, return state
        '''
        self.state = GoGame.get_init_board(self.size)
        self.group_map = [set(), set()]
        self.done = False
        return np.copy(self.state)

    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        actions = np.array([action])
        states, group_maps = GoGame.get_batch_next_states(
            self.state, actions, self.group_map)
        self.state, self.group_map = states[0], group_maps[0]
        self.done = GoGame.get_game_ended(self.state)
        return np.copy(
            self.state), self.get_reward(), self.done, self.get_info()

    def game_ended(self):
        return self.done

    def turn(self):
        return GoGame.get_turn(self.state)

    def prev_player_passed(self):
        return GoGame.get_prev_player_passed(self.state)

    def get_valid_moves(self):
        return GoGame.get_valid_moves(self.state)

    def action_2d_to_1d(self, action_2d):
        if action_2d is None:
            action_1d = self.size**2
        else:
            action_1d = action_2d[0] * self.size + action_2d[1]
        return action_1d

    def uniform_random_action(self):
        valid_moves = self.get_valid_moves()
        valid_move_idcs = np.argwhere(valid_moves).flatten()
        return np.random.choice(valid_move_idcs)

    def get_info(self):
        """
        :return: Debugging info for the state
        """
        return {
            'prev_player_passed': GoGame.get_prev_player_passed(self.state),
            'turn':
            'b' if GoGame.get_turn(self.state) == GoEnv.govars.BLACK else 'w',
            'game_ended': GoGame.get_game_ended(self.state)
        }

    def get_state(self):
        """
        :return: copy of state
        """
        return np.copy(self.state)

    def get_canonical_state(self):
        """
        :return: canonical shallow copy of state
        """
        return GoGame.get_canonical_form(self.state)

    def get_canonical_group_map(self):
        if self.turn() == govars.BLACK:
            return self.group_map
        else:
            return list(reversed(self.group_map))

    def get_children(self, canonical=False):
        """
        :return: Same as get_children, but in canonical form
        """
        return GoGame.get_children(self.state, self.group_map, canonical)

    def get_winning(self):
        """
        :return: Who's currently winning in BLACK's perspective, regardless if the game is over
        """
        return GoGame.get_winning(self.state)

    def get_winner(self):
        """
        Get's the winner in BLACK's perspective
        :return:
        """

        if self.game_ended():
            return self.get_winning()
        else:
            return 0

    def get_reward(self):
        '''
        Return reward based on reward_method.
        heuristic: black total area - white total area
        real: 0 for in-game move, 1 for winning, 0 for losing,
            0.5 for draw, from black player's perspective.
            Winning and losing based on the Area rule
            Also known as Trump Taylor Scoring
        Area rule definition: https://en.wikipedia.org/wiki/Rules_of_Go#End
        '''
        if self.reward_method == RewardMethod.REAL:
            return self.get_winner()

        elif self.reward_method == RewardMethod.HEURISTIC:
            black_area, white_area = GoGame.get_areas(self.state)
            area_difference = black_area - white_area
            if self.game_ended():
                return (1 if area_difference > 0 else -1) * self.size**2
            return area_difference
        else:
            raise Exception("Unknown Reward Method")

    def __str__(self):
        return GoGame.str(self.state)

    def close(self):
        if hasattr(self, 'window'):
            assert hasattr(self, 'pyglet')
            self.window.close()
            self.pyglet.app.exit()

    def render(self, mode='terminal'):
        if mode == 'terminal':
            print(self.__str__())
        elif mode == 'human':
            import pyglet
            from pyglet.window import mouse
            from pyglet.window import key

            screen = pyglet.window.get_platform().get_default_display(
            ).get_default_screen()
            window_width = int(min(screen.width, screen.height) * 2 / 3)
            window_height = int(window_width * 1.2)
            window = pyglet.window.Window(window_width, window_height)

            self.window = window
            self.pyglet = pyglet
            self.user_action = None

            # Set Cursor
            cursor = window.get_system_mouse_cursor(window.CURSOR_CROSSHAIR)
            window.set_mouse_cursor(cursor)

            # Outlines
            lower_grid_coord = window_width * 0.075
            board_size = window_width * 0.85
            upper_grid_coord = board_size + lower_grid_coord
            delta = board_size / (self.size - 1)
            piece_r = delta / 3.3  # radius

            @window.event
            def on_draw():
                pyglet.gl.glClearColor(0.7, 0.5, 0.3, 1)
                window.clear()

                pyglet.gl.glLineWidth(3)
                batch = pyglet.graphics.Batch()

                # draw the grid and labels
                rendering.draw_grid(batch, delta, self.size, lower_grid_coord,
                                    upper_grid_coord)

                # info on top of the board
                rendering.draw_info(batch, window_width, window_height,
                                    upper_grid_coord, self.state)

                # Inform user what they can do
                rendering.draw_command_labels(batch, window_width,
                                              window_height)

                rendering.draw_title(batch, window_width, window_height)

                batch.draw()

                # draw the pieces
                rendering.draw_pieces(batch, lower_grid_coord, delta, piece_r,
                                      self.size, self.state)

            @window.event
            def on_mouse_press(x, y, button, modifiers):
                if button == mouse.LEFT:
                    grid_x = (x - lower_grid_coord)
                    grid_y = (y - lower_grid_coord)
                    x_coord = round(grid_x / delta)
                    y_coord = round(grid_y / delta)
                    try:
                        self.window.close()
                        pyglet.app.exit()
                        self.user_action = (x_coord, y_coord)
                    except:
                        pass

            @window.event
            def on_key_press(symbol, modifiers):
                if symbol == key.P:
                    self.window.close()
                    pyglet.app.exit()
                    self.user_action = None
                elif symbol == key.R:
                    self.reset()
                    self.window.close()
                    pyglet.app.exit()
                elif symbol == key.E:
                    self.window.close()
                    pyglet.app.exit()
                    self.user_action = -1

            pyglet.app.run()

            return self.user_action
Пример #18
0
 def __str__(self):
     return GoGame.str(self.state)
Пример #19
0
 def get_winning(self):
     """
     :return: Who's currently winning in BLACK's perspective, regardless if the game is over
     """
     return GoGame.get_winning(self.state)
Пример #20
0
 def get_canonical_state(self):
     """
     :return: canonical shallow copy of state
     """
     return GoGame.get_canonical_form(self.state)
Пример #21
0
 def game_ended(self):
     return GoGame.get_game_ended(self.state)
Пример #22
0
 def prev_player_passed(self):
     return GoGame.get_prev_player_passed(self.state)
Пример #23
0
 def get_children(self, canonical=False):
     """
     :return: Same as get_children, but in canonical form
     """
     return GoGame.get_children(self.state, self.group_map, canonical)
Пример #24
0
 def get_valid_moves(self):
     return GoGame.get_valid_moves(self.state)
Пример #25
0
 def get_canonical_state(self):
     return GoGame.get_canonical_form(self.state)