Python GoGame示例，gym_go.gogame.GoGame Python示例

示例#1

0

显示文件

文件： rendering.py 项目： mattrym/GymGo

def draw_info(batch, window_width, window_height, upper_grid_coord, state):
    turn = GoGame.get_turn(state)
    turn_str = 'B' if turn == BLACK else 'W'
    prev_player_passed = GoGame.get_prev_player_passed(state)
    game_ended = GoGame.get_game_ended(state)
    info_label = "Turn: {}\nPassed: {}\nGame: {}".format(
        turn_str, prev_player_passed, "OVER" if game_ended else "ONGOING")

    pyglet.text.Label(info_label,
                      font_name='Helvetica',
                      font_size=11,
                      x=window_width - 20,
                      y=window_height - 20,
                      anchor_x='right',
                      anchor_y='top',
                      color=(0, 0, 0, 192),
                      batch=batch,
                      width=window_width / 2,
                      align='right',
                      multiline=True)

    # Areas
    black_area, white_area = GoGame.get_areas(state)
    pyglet.text.Label("{}B | {}W".format(black_area, white_area),
                      font_name='Helvetica',
                      font_size=16,
                      x=window_width / 2,
                      y=upper_grid_coord + 80,
                      anchor_x='center',
                      color=(0, 0, 0, 192),
                      batch=batch,
                      width=window_width,
                      align='center')

示例#2

0

显示文件

文件： go_env.py 项目： mattrym/GymGo

 def step(self, action):
     '''
     Assumes the correct player is making a move. Black goes first.
     return observation, reward, done, info
     '''
     if action is None:
         action = self.size**2
     elif isinstance(action, tuple) or isinstance(
             action, list) or isinstance(action, np.ndarray):
         assert action[0] >= 0 and action[1] >= 0
         assert action[0] < self.size and action[1] < self.size
         action = action[0] * self.size + action[1]
     if self.children is not None:
         valid_moves = self.get_valid_moves()
         child_idx = int(np.sum(valid_moves[:action]))
         self.state, self.group_map = self.children[
             child_idx], self.child_groupmaps[child_idx]
     else:
         self.state, self.group_map = GoGame.get_next_state(self.state,
                                                            action,
                                                            self.group_map,
                                                            inplace=True)
     self.clear_cache()
     return np.copy(self.state), self.get_reward(), GoGame.get_game_ended(
         self.state), self.get_info()

示例#3

0

显示文件

 def get_info(self):
     """
     :return: Debugging info for the state
     """
     return {
         'prev_player_passed': GoGame.get_prev_player_passed(self.state),
         'turn': 'b' if GoGame.get_turn(self.state) == GoEnv.govars.BLACK else 'w',
         'game_ended': GoGame.get_game_ended(self.state)
     }

示例#4

0

显示文件

文件： go_env.py 项目： mattrym/GymGo

    def cache_children(self, canonical=False):
        """
        :return: Same as get_children, but in canonical form
        """
        self.children, self.child_groupmaps = GoGame.get_children(
            self.state, self.group_map)
        children = self.children.copy()
        child_groupmaps = self.child_groupmaps.copy()
        if canonical:
            for i in range(len(children)):
                children[i] = GoGame.get_canonical_form(children[i])

        return children, child_groupmaps

示例#5

0

显示文件

文件： go_env.py 项目： mattrym/GymGo

 def __init__(self, size, reward_method='real', black_first=True):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     self.state = GoGame.get_init_board(size, black_first)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size))
     self.action_space = gym.spaces.Discrete(
         GoGame.get_action_size(self.state))
     self.group_map = np.empty(self.state.shape[1:], dtype=object)
     self.clear_cache()

示例#6

0

显示文件

文件： go_env.py 项目： Hizoul/GymGo

 def step(self, action):
     '''
     Assumes the correct player is making a move. Black goes first.
     return observation, reward, done, info
     '''
     if action is None:
         action = self.size**2
     elif isinstance(action, tuple) or isinstance(
             action, list) or isinstance(action, np.ndarray):
         assert action[0] >= 0 and action[1] >= 0
         assert action[0] < self.size and action[1] < self.size
         action = action[0] * self.size + action[1]
     self.state = GoGame.get_next_state(self.state, action)
     return np.copy(self.state), self.get_reward(), GoGame.get_game_ended(
         self.state), self.get_info()

示例#7

0

显示文件

 def __init__(self, size, komi=0, reward_method='real'):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     self.komi = komi
     self.state = GoGame.get_init_board(size)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS),
                                             shape=(govars.NUM_CHNLS, size, size))
     self.action_space = gym.spaces.Discrete(GoGame.get_action_size(self.state))
     self.group_map = [set(), set()]
     self.done = False

示例#8

0

显示文件

文件： go_env.py 项目： mattrym/GymGo

 def reset(self, black_first=True):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     self.state = GoGame.get_init_board(self.size, black_first)
     self.group_map = np.empty(self.state.shape[1:], dtype=object)
     self.clear_cache()
     return np.copy(self.state)

示例#9

0

显示文件

 def reset(self):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     self.state = GoGame.get_init_board(self.size)
     self.group_map = [set(), set()]
     self.done = False
     return np.copy(self.state)

示例#10

0

显示文件

    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size ** 2

        actions = np.array([action])
        states, group_maps = GoGame.get_next_states(self.state, actions, self.group_map)
        self.state, self.group_map = states[0], group_maps[0]
        self.done = GoGame.get_game_ended(self.state)
        return np.copy(self.state), self.get_reward(), self.done, self.get_info()

示例#11

0

显示文件

文件： go_env.py 项目： Hizoul/GymGo

 def reset(self, black_first=True, state=None):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     if state is None:
         self.state = GoGame.get_init_board(self.size, black_first)
     else:
         assert state.shape[1] == self.size
         self.state = np.copy(state)
     return np.copy(self.state)

示例#12

0

显示文件

文件： go_env.py 项目： Hizoul/GymGo

 def __init__(self,
              size,
              reward_method='real',
              black_first=True,
              state=None):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     if state is None:
         self.state = GoGame.get_init_board(size, black_first)
     else:
         assert state.shape[1] == size
         self.state = np.copy(state)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size))
     self.action_space = gym.spaces.Discrete(
         GoGame.get_action_size(self.state))

示例#13

0

显示文件

    def step_batch(self, state, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        But next step will not change the previous state
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        actions = np.array([action])
        next_states, next_group_maps = GoGame.get_batch_next_states(
            state, actions, self.group_map)
        next_state, next_group_map = next_states[0], next_group_maps[0]
        next_done = GoGame.get_game_ended(next_state)
        return np.copy(next_state), self.get_reward_batch(
            next_state, next_done), next_done, self.get_info_batch(next_state)

示例#14

0

显示文件

文件： go_env.py 项目： Hizoul/GymGo

    def get_winning(self):
        """
        :return: Who's currently winning, regardless if the game is over
        """
        black_area, white_area = GoGame.get_areas(self.state)
        area_difference = black_area - white_area

        if area_difference > 0:
            return 1
        elif area_difference == 0:
            return 0.5
        else:
            assert area_difference < 0
            return 0

示例#15

0

显示文件

    def get_reward(self):
        '''
        Return reward based on reward_method.
        heuristic: black total area - white total area
        real: 0 for in-game move, 1 for winning, 0 for losing,
            0.5 for draw, from black player's perspective.
            Winning and losing based on the Area rule
            Also known as Trump Taylor Scoring
        Area rule definition: https://en.wikipedia.org/wiki/Rules_of_Go#End
        '''
        if self.reward_method == RewardMethod.REAL:
            return self.get_winner()

        elif self.reward_method == RewardMethod.HEURISTIC:
            black_area, white_area = GoGame.get_areas(self.state)
            area_difference = black_area - white_area
            if self.game_ended():
                return (1 if area_difference > 0 else -1) * self.size**2
            return area_difference
        else:
            raise Exception("Unknown Reward Method")

示例#16

0

显示文件

 def turn(self):
     return GoGame.get_turn(self.state)

示例#17

0

显示文件

class GoEnv(gym.Env):
    metadata = {'render.modes': ['terminal', 'human']}
    gogame = GoGame()
    govars = govars

    def __init__(self, size, reward_method='real'):
        '''
        @param reward_method: either 'heuristic' or 'real'
        heuristic: gives # black pieces - # white pieces.
        real: gives 0 for in-game move, 1 for winning, -1 for losing,
            0 for draw, all from black player's perspective
        '''
        self.size = size
        self.state = GoGame.get_init_board(size)
        self.reward_method = RewardMethod(reward_method)
        self.observation_space = gym.spaces.Box(0,
                                                govars.NUM_CHNLS,
                                                shape=(govars.NUM_CHNLS, size,
                                                       size))
        self.action_space = gym.spaces.Discrete(
            GoGame.get_action_size(self.state))
        self.group_map = [set(), set()]
        self.done = False

    def reset(self):
        '''
        Reset state, go_board, curr_player, prev_player_passed,
        done, return state
        '''
        self.state = GoGame.get_init_board(self.size)
        self.group_map = [set(), set()]
        self.done = False
        return np.copy(self.state)

    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        actions = np.array([action])
        states, group_maps = GoGame.get_batch_next_states(
            self.state, actions, self.group_map)
        self.state, self.group_map = states[0], group_maps[0]
        self.done = GoGame.get_game_ended(self.state)
        return np.copy(
            self.state), self.get_reward(), self.done, self.get_info()

    def game_ended(self):
        return self.done

    def turn(self):
        return GoGame.get_turn(self.state)

    def prev_player_passed(self):
        return GoGame.get_prev_player_passed(self.state)

    def get_valid_moves(self):
        return GoGame.get_valid_moves(self.state)

    def action_2d_to_1d(self, action_2d):
        if action_2d is None:
            action_1d = self.size**2
        else:
            action_1d = action_2d[0] * self.size + action_2d[1]
        return action_1d

    def uniform_random_action(self):
        valid_moves = self.get_valid_moves()
        valid_move_idcs = np.argwhere(valid_moves).flatten()
        return np.random.choice(valid_move_idcs)

    def get_info(self):
        """
        :return: Debugging info for the state
        """
        return {
            'prev_player_passed': GoGame.get_prev_player_passed(self.state),
            'turn':
            'b' if GoGame.get_turn(self.state) == GoEnv.govars.BLACK else 'w',
            'game_ended': GoGame.get_game_ended(self.state)
        }

    def get_state(self):
        """
        :return: copy of state
        """
        return np.copy(self.state)

    def get_canonical_state(self):
        """
        :return: canonical shallow copy of state
        """
        return GoGame.get_canonical_form(self.state)

    def get_canonical_group_map(self):
        if self.turn() == govars.BLACK:
            return self.group_map
        else:
            return list(reversed(self.group_map))

    def get_children(self, canonical=False):
        """
        :return: Same as get_children, but in canonical form
        """
        return GoGame.get_children(self.state, self.group_map, canonical)

    def get_winning(self):
        """
        :return: Who's currently winning in BLACK's perspective, regardless if the game is over
        """
        return GoGame.get_winning(self.state)

    def get_winner(self):
        """
        Get's the winner in BLACK's perspective
        :return:
        """

        if self.game_ended():
            return self.get_winning()
        else:
            return 0

    def get_reward(self):
        '''
        Return reward based on reward_method.
        heuristic: black total area - white total area
        real: 0 for in-game move, 1 for winning, 0 for losing,
            0.5 for draw, from black player's perspective.
            Winning and losing based on the Area rule
            Also known as Trump Taylor Scoring
        Area rule definition: https://en.wikipedia.org/wiki/Rules_of_Go#End
        '''
        if self.reward_method == RewardMethod.REAL:
            return self.get_winner()

        elif self.reward_method == RewardMethod.HEURISTIC:
            black_area, white_area = GoGame.get_areas(self.state)
            area_difference = black_area - white_area
            if self.game_ended():
                return (1 if area_difference > 0 else -1) * self.size**2
            return area_difference
        else:
            raise Exception("Unknown Reward Method")

    def __str__(self):
        return GoGame.str(self.state)

    def close(self):
        if hasattr(self, 'window'):
            assert hasattr(self, 'pyglet')
            self.window.close()
            self.pyglet.app.exit()

    def render(self, mode='terminal'):
        if mode == 'terminal':
            print(self.__str__())
        elif mode == 'human':
            import pyglet
            from pyglet.window import mouse
            from pyglet.window import key

            screen = pyglet.window.get_platform().get_default_display(
            ).get_default_screen()
            window_width = int(min(screen.width, screen.height) * 2 / 3)
            window_height = int(window_width * 1.2)
            window = pyglet.window.Window(window_width, window_height)

            self.window = window
            self.pyglet = pyglet
            self.user_action = None

            # Set Cursor
            cursor = window.get_system_mouse_cursor(window.CURSOR_CROSSHAIR)
            window.set_mouse_cursor(cursor)

            # Outlines
            lower_grid_coord = window_width * 0.075
            board_size = window_width * 0.85
            upper_grid_coord = board_size + lower_grid_coord
            delta = board_size / (self.size - 1)
            piece_r = delta / 3.3  # radius

            @window.event
            def on_draw():
                pyglet.gl.glClearColor(0.7, 0.5, 0.3, 1)
                window.clear()

                pyglet.gl.glLineWidth(3)
                batch = pyglet.graphics.Batch()

                # draw the grid and labels
                rendering.draw_grid(batch, delta, self.size, lower_grid_coord,
                                    upper_grid_coord)

                # info on top of the board
                rendering.draw_info(batch, window_width, window_height,
                                    upper_grid_coord, self.state)

                # Inform user what they can do
                rendering.draw_command_labels(batch, window_width,
                                              window_height)

                rendering.draw_title(batch, window_width, window_height)

                batch.draw()

                # draw the pieces
                rendering.draw_pieces(batch, lower_grid_coord, delta, piece_r,
                                      self.size, self.state)

            @window.event
            def on_mouse_press(x, y, button, modifiers):
                if button == mouse.LEFT:
                    grid_x = (x - lower_grid_coord)
                    grid_y = (y - lower_grid_coord)
                    x_coord = round(grid_x / delta)
                    y_coord = round(grid_y / delta)
                    try:
                        self.window.close()
                        pyglet.app.exit()
                        self.user_action = (x_coord, y_coord)
                    except:
                        pass

            @window.event
            def on_key_press(symbol, modifiers):
                if symbol == key.P:
                    self.window.close()
                    pyglet.app.exit()
                    self.user_action = None
                elif symbol == key.R:
                    self.reset()
                    self.window.close()
                    pyglet.app.exit()
                elif symbol == key.E:
                    self.window.close()
                    pyglet.app.exit()
                    self.user_action = -1

            pyglet.app.run()

            return self.user_action

示例#18

0

显示文件

 def __str__(self):
     return GoGame.str(self.state)

示例#19

0

显示文件

 def get_winning(self):
     """
     :return: Who's currently winning in BLACK's perspective, regardless if the game is over
     """
     return GoGame.get_winning(self.state)

示例#20

0

显示文件

 def get_canonical_state(self):
     """
     :return: canonical shallow copy of state
     """
     return GoGame.get_canonical_form(self.state)

示例#21

0

显示文件

文件： go_env.py 项目： Hizoul/GymGo

 def game_ended(self):
     return GoGame.get_game_ended(self.state)

示例#22

0

显示文件

 def prev_player_passed(self):
     return GoGame.get_prev_player_passed(self.state)

示例#23

0

显示文件

 def get_children(self, canonical=False):
     """
     :return: Same as get_children, but in canonical form
     """
     return GoGame.get_children(self.state, self.group_map, canonical)

示例#24

0

显示文件

 def get_valid_moves(self):
     return GoGame.get_valid_moves(self.state)

示例#25

0

显示文件

文件： go_env.py 项目： Hizoul/GymGo

 def get_canonical_state(self):
     return GoGame.get_canonical_form(self.state)