Python GoGame примеры, gym_go.gogame.GoGame Python примеры использования

Пример #1

0

Показать файл

Файл: rendering.py Проект: mattrym/GymGo

def draw_info(batch, window_width, window_height, upper_grid_coord, state):
    turn = GoGame.get_turn(state)
    turn_str = 'B' if turn == BLACK else 'W'
    prev_player_passed = GoGame.get_prev_player_passed(state)
    game_ended = GoGame.get_game_ended(state)
    info_label = "Turn: {}\nPassed: {}\nGame: {}".format(
        turn_str, prev_player_passed, "OVER" if game_ended else "ONGOING")

    pyglet.text.Label(info_label,
                      font_name='Helvetica',
                      font_size=11,
                      x=window_width - 20,
                      y=window_height - 20,
                      anchor_x='right',
                      anchor_y='top',
                      color=(0, 0, 0, 192),
                      batch=batch,
                      width=window_width / 2,
                      align='right',
                      multiline=True)

    # Areas
    black_area, white_area = GoGame.get_areas(state)
    pyglet.text.Label("{}B | {}W".format(black_area, white_area),
                      font_name='Helvetica',
                      font_size=16,
                      x=window_width / 2,
                      y=upper_grid_coord + 80,
                      anchor_x='center',
                      color=(0, 0, 0, 192),
                      batch=batch,
                      width=window_width,
                      align='center')

Пример #2

0

Показать файл

Файл: go_env.py Проект: mattrym/GymGo

 def step(self, action):
     '''
     Assumes the correct player is making a move. Black goes first.
     return observation, reward, done, info
     '''
     if action is None:
         action = self.size**2
     elif isinstance(action, tuple) or isinstance(
             action, list) or isinstance(action, np.ndarray):
         assert action[0] >= 0 and action[1] >= 0
         assert action[0] < self.size and action[1] < self.size
         action = action[0] * self.size + action[1]
     if self.children is not None:
         valid_moves = self.get_valid_moves()
         child_idx = int(np.sum(valid_moves[:action]))
         self.state, self.group_map = self.children[
             child_idx], self.child_groupmaps[child_idx]
     else:
         self.state, self.group_map = GoGame.get_next_state(self.state,
                                                            action,
                                                            self.group_map,
                                                            inplace=True)
     self.clear_cache()
     return np.copy(self.state), self.get_reward(), GoGame.get_game_ended(
         self.state), self.get_info()

Пример #3

0

Показать файл

 def get_info(self):
     """
     :return: Debugging info for the state
     """
     return {
         'prev_player_passed': GoGame.get_prev_player_passed(self.state),
         'turn': 'b' if GoGame.get_turn(self.state) == GoEnv.govars.BLACK else 'w',
         'game_ended': GoGame.get_game_ended(self.state)
     }

Пример #4

0

Показать файл

Файл: go_env.py Проект: mattrym/GymGo

    def cache_children(self, canonical=False):
        """
        :return: Same as get_children, but in canonical form
        """
        self.children, self.child_groupmaps = GoGame.get_children(
            self.state, self.group_map)
        children = self.children.copy()
        child_groupmaps = self.child_groupmaps.copy()
        if canonical:
            for i in range(len(children)):
                children[i] = GoGame.get_canonical_form(children[i])

        return children, child_groupmaps

Пример #5

0

Показать файл

Файл: go_env.py Проект: mattrym/GymGo

 def __init__(self, size, reward_method='real', black_first=True):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     self.state = GoGame.get_init_board(size, black_first)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size))
     self.action_space = gym.spaces.Discrete(
         GoGame.get_action_size(self.state))
     self.group_map = np.empty(self.state.shape[1:], dtype=object)
     self.clear_cache()

Пример #6

0

Показать файл

Файл: go_env.py Проект: Hizoul/GymGo

 def step(self, action):
     '''
     Assumes the correct player is making a move. Black goes first.
     return observation, reward, done, info
     '''
     if action is None:
         action = self.size**2
     elif isinstance(action, tuple) or isinstance(
             action, list) or isinstance(action, np.ndarray):
         assert action[0] >= 0 and action[1] >= 0
         assert action[0] < self.size and action[1] < self.size
         action = action[0] * self.size + action[1]
     self.state = GoGame.get_next_state(self.state, action)
     return np.copy(self.state), self.get_reward(), GoGame.get_game_ended(
         self.state), self.get_info()

Пример #7

0

Показать файл

 def __init__(self, size, komi=0, reward_method='real'):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     self.komi = komi
     self.state = GoGame.get_init_board(size)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS),
                                             shape=(govars.NUM_CHNLS, size, size))
     self.action_space = gym.spaces.Discrete(GoGame.get_action_size(self.state))
     self.group_map = [set(), set()]
     self.done = False

Пример #8

0

Показать файл

Файл: go_env.py Проект: mattrym/GymGo

 def reset(self, black_first=True):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     self.state = GoGame.get_init_board(self.size, black_first)
     self.group_map = np.empty(self.state.shape[1:], dtype=object)
     self.clear_cache()
     return np.copy(self.state)

Пример #9

0

Показать файл

 def reset(self):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     self.state = GoGame.get_init_board(self.size)
     self.group_map = [set(), set()]
     self.done = False
     return np.copy(self.state)

Пример #10

0

Показать файл

    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size ** 2

        actions = np.array([action])
        states, group_maps = GoGame.get_next_states(self.state, actions, self.group_map)
        self.state, self.group_map = states[0], group_maps[0]
        self.done = GoGame.get_game_ended(self.state)
        return np.copy(self.state), self.get_reward(), self.done, self.get_info()

Пример #11

0

Показать файл

Файл: go_env.py Проект: Hizoul/GymGo

 def reset(self, black_first=True, state=None):
     '''
     Reset state, go_board, curr_player, prev_player_passed,
     done, return state
     '''
     if state is None:
         self.state = GoGame.get_init_board(self.size, black_first)
     else:
         assert state.shape[1] == self.size
         self.state = np.copy(state)
     return np.copy(self.state)

Пример #12

0

Показать файл

Файл: go_env.py Проект: Hizoul/GymGo

 def __init__(self,
              size,
              reward_method='real',
              black_first=True,
              state=None):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     if state is None:
         self.state = GoGame.get_init_board(size, black_first)
     else:
         assert state.shape[1] == size
         self.state = np.copy(state)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size))
     self.action_space = gym.spaces.Discrete(
         GoGame.get_action_size(self.state))

Пример #13

0

Показать файл

    def step_batch(self, state, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        But next step will not change the previous state
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        actions = np.array([action])
        next_states, next_group_maps = GoGame.get_batch_next_states(
            state, actions, self.group_map)
        next_state, next_group_map = next_states[0], next_group_maps[0]
        next_done = GoGame.get_game_ended(next_state)
        return np.copy(next_state), self.get_reward_batch(
            next_state, next_done), next_done, self.get_info_batch(next_state)

Пример #14

0

Показать файл

Файл: go_env.py Проект: Hizoul/GymGo

    def get_winning(self):
        """
        :return: Who's currently winning, regardless if the game is over
        """
        black_area, white_area = GoGame.get_areas(self.state)
        area_difference = black_area - white_area

        if area_difference > 0:
            return 1
        elif area_difference == 0:
            return 0.5
        else:
            assert area_difference < 0
            return 0

Пример #15

0

Показать файл

    def get_reward(self):
        '''
        Return reward based on reward_method.
        heuristic: black total area - white total area
        real: 0 for in-game move, 1 for winning, 0 for losing,
            0.5 for draw, from black player's perspective.
            Winning and losing based on the Area rule
            Also known as Trump Taylor Scoring
        Area rule definition: https://en.wikipedia.org/wiki/Rules_of_Go#End
        '''
        if self.reward_method == RewardMethod.REAL:
            return self.get_winner()

        elif self.reward_method == RewardMethod.HEURISTIC:
            black_area, white_area = GoGame.get_areas(self.state)
            area_difference = black_area - white_area
            if self.game_ended():
                return (1 if area_difference > 0 else -1) * self.size**2
            return area_difference
        else:
            raise Exception("Unknown Reward Method")

Пример #16

0

Показать файл

 def turn(self):
     return GoGame.get_turn(self.state)

Пример #17

0

Показать файл

class GoEnv(gym.Env):
    metadata = {'render.modes': ['terminal', 'human']}
    gogame = GoGame()
    govars = govars

    def __init__(self, size, reward_method='real'):
        '''
        @param reward_method: either 'heuristic' or 'real'
        heuristic: gives # black pieces - # white pieces.
        real: gives 0 for in-game move, 1 for winning, -1 for losing,
            0 for draw, all from black player's perspective
        '''
        self.size = size
        self.state = GoGame.get_init_board(size)
        self.reward_method = RewardMethod(reward_method)
        self.observation_space = gym.spaces.Box(0,
                                                govars.NUM_CHNLS,
                                                shape=(govars.NUM_CHNLS, size,
                                                       size))
        self.action_space = gym.spaces.Discrete(
            GoGame.get_action_size(self.state))
        self.group_map = [set(), set()]
        self.done = False

    def reset(self):
        '''
        Reset state, go_board, curr_player, prev_player_passed,
        done, return state
        '''
        self.state = GoGame.get_init_board(self.size)
        self.group_map = [set(), set()]
        self.done = False
        return np.copy(self.state)

    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        actions = np.array([action])
        states, group_maps = GoGame.get_batch_next_states(
            self.state, actions, self.group_map)
        self.state, self.group_map = states[0], group_maps[0]
        self.done = GoGame.get_game_ended(self.state)
        return np.copy(
            self.state), self.get_reward(), self.done, self.get_info()

    def game_ended(self):
        return self.done

    def turn(self):
        return GoGame.get_turn(self.state)

    def prev_player_passed(self):
        return GoGame.get_prev_player_passed(self.state)

    def get_valid_moves(self):
        return GoGame.get_valid_moves(self.state)

    def action_2d_to_1d(self, action_2d):
        if action_2d is None:
            action_1d = self.size**2
        else:
            action_1d = action_2d[0] * self.size + action_2d[1]
        return action_1d

    def uniform_random_action(self):
        valid_moves = self.get_valid_moves()
        valid_move_idcs = np.argwhere(valid_moves).flatten()
        return np.random.choice(valid_move_idcs)

    def get_info(self):
        """
        :return: Debugging info for the state
        """
        return {
            'prev_player_passed': GoGame.get_prev_player_passed(self.state),
            'turn':
            'b' if GoGame.get_turn(self.state) == GoEnv.govars.BLACK else 'w',
            'game_ended': GoGame.get_game_ended(self.state)
        }

    def get_state(self):
        """
        :return: copy of state
        """
        return np.copy(self.state)

    def get_canonical_state(self):
        """
        :return: canonical shallow copy of state
        """
        return GoGame.get_canonical_form(self.state)

    def get_canonical_group_map(self):
        if self.turn() == govars.BLACK:
            return self.group_map
        else:
            return list(reversed(self.group_map))

    def get_children(self, canonical=False):
        """
        :return: Same as get_children, but in canonical form
        """
        return GoGame.get_children(self.state, self.group_map, canonical)

    def get_winning(self):
        """
        :return: Who's currently winning in BLACK's perspective, regardless if the game is over
        """
        return GoGame.get_winning(self.state)

    def get_winner(self):
        """
        Get's the winner in BLACK's perspective
        :return:
        """

        if self.game_ended():
            return self.get_winning()
        else:
            return 0

    def get_reward(self):
        '''
        Return reward based on reward_method.
        heuristic: black total area - white total area
        real: 0 for in-game move, 1 for winning, 0 for losing,
            0.5 for draw, from black player's perspective.
            Winning and losing based on the Area rule
            Also known as Trump Taylor Scoring
        Area rule definition: https://en.wikipedia.org/wiki/Rules_of_Go#End
        '''
        if self.reward_method == RewardMethod.REAL:
            return self.get_winner()

        elif self.reward_method == RewardMethod.HEURISTIC:
            black_area, white_area = GoGame.get_areas(self.state)
            area_difference = black_area - white_area
            if self.game_ended():
                return (1 if area_difference > 0 else -1) * self.size**2
            return area_difference
        else:
            raise Exception("Unknown Reward Method")

    def __str__(self):
        return GoGame.str(self.state)

    def close(self):
        if hasattr(self, 'window'):
            assert hasattr(self, 'pyglet')
            self.window.close()
            self.pyglet.app.exit()

    def render(self, mode='terminal'):
        if mode == 'terminal':
            print(self.__str__())
        elif mode == 'human':
            import pyglet
            from pyglet.window import mouse
            from pyglet.window import key

            screen = pyglet.window.get_platform().get_default_display(
            ).get_default_screen()
            window_width = int(min(screen.width, screen.height) * 2 / 3)
            window_height = int(window_width * 1.2)
            window = pyglet.window.Window(window_width, window_height)

            self.window = window
            self.pyglet = pyglet
            self.user_action = None

            # Set Cursor
            cursor = window.get_system_mouse_cursor(window.CURSOR_CROSSHAIR)
            window.set_mouse_cursor(cursor)

            # Outlines
            lower_grid_coord = window_width * 0.075
            board_size = window_width * 0.85
            upper_grid_coord = board_size + lower_grid_coord
            delta = board_size / (self.size - 1)
            piece_r = delta / 3.3  # radius

            @window.event
            def on_draw():
                pyglet.gl.glClearColor(0.7, 0.5, 0.3, 1)
                window.clear()

                pyglet.gl.glLineWidth(3)
                batch = pyglet.graphics.Batch()

                # draw the grid and labels
                rendering.draw_grid(batch, delta, self.size, lower_grid_coord,
                                    upper_grid_coord)

                # info on top of the board
                rendering.draw_info(batch, window_width, window_height,
                                    upper_grid_coord, self.state)

                # Inform user what they can do
                rendering.draw_command_labels(batch, window_width,
                                              window_height)

                rendering.draw_title(batch, window_width, window_height)

                batch.draw()

                # draw the pieces
                rendering.draw_pieces(batch, lower_grid_coord, delta, piece_r,
                                      self.size, self.state)

            @window.event
            def on_mouse_press(x, y, button, modifiers):
                if button == mouse.LEFT:
                    grid_x = (x - lower_grid_coord)
                    grid_y = (y - lower_grid_coord)
                    x_coord = round(grid_x / delta)
                    y_coord = round(grid_y / delta)
                    try:
                        self.window.close()
                        pyglet.app.exit()
                        self.user_action = (x_coord, y_coord)
                    except:
                        pass

            @window.event
            def on_key_press(symbol, modifiers):
                if symbol == key.P:
                    self.window.close()
                    pyglet.app.exit()
                    self.user_action = None
                elif symbol == key.R:
                    self.reset()
                    self.window.close()
                    pyglet.app.exit()
                elif symbol == key.E:
                    self.window.close()
                    pyglet.app.exit()
                    self.user_action = -1

            pyglet.app.run()

            return self.user_action

Пример #18

0

Показать файл

 def __str__(self):
     return GoGame.str(self.state)

Пример #19

0

Показать файл

 def get_winning(self):
     """
     :return: Who's currently winning in BLACK's perspective, regardless if the game is over
     """
     return GoGame.get_winning(self.state)

Пример #20

0

Показать файл

 def get_canonical_state(self):
     """
     :return: canonical shallow copy of state
     """
     return GoGame.get_canonical_form(self.state)

Пример #21

0

Показать файл

Файл: go_env.py Проект: Hizoul/GymGo

 def game_ended(self):
     return GoGame.get_game_ended(self.state)

Пример #22

0

Показать файл

 def prev_player_passed(self):
     return GoGame.get_prev_player_passed(self.state)

Пример #23

0

Показать файл

 def get_children(self, canonical=False):
     """
     :return: Same as get_children, but in canonical form
     """
     return GoGame.get_children(self.state, self.group_map, canonical)

Пример #24

0

Показать файл

 def get_valid_moves(self):
     return GoGame.get_valid_moves(self.state)

Пример #25

0

Показать файл

Файл: go_env.py Проект: Hizoul/GymGo

 def get_canonical_state(self):
     return GoGame.get_canonical_form(self.state)

Python GoGame примеры использования