def __init__(self, board_size=BOARD_SIZE, color=BLACK, board=None): ''' Args: board_size: size of board color: color of current player board: current board ''' assert color in [BLACK, WHITE], 'Invalid player color' if board: self.board = board else: self.board = pachi_py.CreateBoard(board_size) self.last_action = -1 self.last_action_2 = -1 self.color = color self.board_size = board_size self.game_over = False self.winner = None self.action_space = spaces.Discrete(board_size**2 + 1) self._new_state_checks() if color == BLACK: self.current_player = 1 else: self.current_player = -1
def reset(self): """ Reset the board """ self.board = pachi_py.CreateBoard(self.board_size) opponent_resigned = False self.done = self.board.is_terminal or opponent_resigned return _format_state(self.history, self.player_color, self.board_size)
def __init__(self, board_size, board=None, player=-1, done=False, last_passed=False, history=None, move_num=0): self.board_size = board_size self.pass_action = board_size**2 self.resign_action = board_size**2 + 1 assert player in [-1, 1] self.curr_player = player self.done = done self.last_passed = last_passed self.move_num = move_num if history is None: self.history = [None] * 7 else: self.history = history if board is None: self.board = pachi_py.CreateBoard(board_size) else: self.board = board
def make_random_board(size): b = pachi_py.CreateBoard(size) c = pachi_py.BLACK for _ in range(0, 50): b = b.play(np.random.choice(b.get_legal_coords(c)), c) c = pachi_py.stone_other(c) return b
def test_illegal_move(): b = pachi_py.CreateBoard(9).play(14, pachi_py.WHITE) try: b.play(14, pachi_py.BLACK) except pachi_py.IllegalMove: return assert False, 'IllegalMove exception should have been raised'
def __init__(self, player_color, observation_type, illegal_move_mode, board_size, komi): """ Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' """ assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size self.komi = komi self._seed() colormap = { 'black': pachi_py.BLACK, 'white': pachi_py.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) # self.opponent_policy = None # self.opponent = opponent assert observation_type in ['image3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode if self.observation_type != 'image3c': raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) shape = pachi_py.CreateBoard(self.board_size).encode().shape self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape)) # One action for each board position, pass, and resign self.action_space = spaces.Discrete(self.board_size**2 + 2) # Filled in by _reset() self.state = None self.done = True # Modifications self.last_player_passed = False self.BLACK = np.array([1, 0, 0]) self.WHITE = np.array([0, 1, 0]) self.EMPTY = np.array([0, 0, 1])
def reset(self): self.state = GoState(pachi_py.CreateBoard(self.board_size), pachi_py.BLACK) # (re-initialize) the opponent # necessary because a pachi engine is attached to a game via internal data in a board # so with a fresh game, we need a fresh engine # self._reset_opponent(self.state.board) # Let the opponent play if it's not the agent's turn opponent_resigned = False # We should be back to the agent color self.last_player_passed = False self.done = self.state.board.is_terminal return self.state.board.encode()
def _seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. seed2 = seeding.hash_seed(seed1 + 1) % 2**32 pachi_py.pachi_srand(seed2) shape = pachi_py.CreateBoard(self.board_size).encode().shape self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape), np_random=self.np_random) # One action for each board position, pass, and resign self.action_space = spaces.Discrete(self.board_size**2 + 2, np_random=self.np_random) return [seed1, seed2]
def _reset(self): self.state = GoState(pachi_py.CreateBoard(self.board_size), pachi_py.BLACK) # (re-initialize) the opponent # necessary because a pachi engine is attached to a game via internal data in a board # so with a fresh game, we need a fresh engine self._reset_opponent(self.state.board) # Let the opponent play if it's not the agent's turn if self.state.color != self.player_color: self.state = self._exec_opponent_play(self.state, None, None) assert self.state.color == self.player_color self.done = self.state.board.is_terminal return self.state.board.encode()
def reset(self): """ Reset the board """ colormap = { 'black': pachi_py.BLACK, 'white': pachi_py.WHITE, } self.player_color = colormap['black'] self.history = [np.zeros((HISTORY + 1, self.board_size, self.board_size)), np.zeros((HISTORY + 1, self.board_size, self.board_size))] self.board = pachi_py.CreateBoard(self.board_size) opponent_resigned = False self.state = _format_state(self.history, self.player_color, self.board_size) self.done = self.board.is_terminal or opponent_resigned return self.state
def __init__(self, player_color, opponent, observation_type, illegal_move_mode, board_size): ''' Args: player_color: Stone color for the agent. Either 'black' or 'white' opponent: An opponent policy observation_type: State encoding illegal_move_mode: What to do when the agent makes an illegal move. Choices: 'raise' or 'lose' ''' assert isinstance( board_size, int) and board_size >= 1, 'Invalid board size: {}'.format( board_size) self.board_size = board_size colormap = { 'black': pachi_py.BLACK, 'white': pachi_py.WHITE, } try: self.player_color = colormap[player_color] except KeyError: raise error.Error( "player_color must be 'black' or 'white', not {}".format( player_color)) self.opponent_policy = None self.opponent = opponent assert observation_type in ['image3c'] self.observation_type = observation_type assert illegal_move_mode in ['lose', 'raise'] self.illegal_move_mode = illegal_move_mode # One action for each board position, pass, and resign self.action_space = spaces.Discrete(self.board_size**2 + 2) if self.observation_type == 'image3c': shape = pachi_py.CreateBoard(self.board_size).encode().shape self.observation_space = spaces.Box(np.zeros(shape), np.ones(shape)) else: raise error.Error('Unsupported observation type: {}'.format( self.observation_type)) self.reset()
def _play(black_policy_fn, white_policy_fn, board_size=19): ''' Samples a trajectory for two player policies. Args: black_policy_fn, white_policy_fn: functions that maps a GoState to a move coord (int) ''' moves = [] prev_state, prev_action = None, None curr_state = GoState(pachi_py.CreateBoard(board_size), BLACK) while not curr_state.board.is_terminal: a = (black_policy_fn if curr_state.color == BLACK else white_policy_fn)(curr_state, prev_state, prev_action) next_state = curr_state.act(a) moves.append((curr_state, a, next_state)) prev_state, prev_action = curr_state, a curr_state = next_state return moves
def _reset(self): # Don't touch moves_log! self.state = CustomGoState(pachi_py.CreateBoard(self.board_size), pachi_py.BLACK) # (re-initialize) the opponent # necessary because a pachi engine is attached to a game via internal data in a board # so with a fresh game, we need a fresh engine self._reset_opponent(self.state.board) # Let the opponent play if it's not the agent's turn opponent_resigned = False if self.state.color != self.player_color: self.state, opponent_resigned = self._exec_opponent_play( self.state, None, None) # We should be back to the agent color assert self.state.color == self.player_color self.done = self.state.board.is_terminal or opponent_resigned return self.state.board.encode()
def get(self): """ Get new move from PACHI given board matrix and stone color. --- tags: - move consumes: - application/json produces: - application/json parameters: - in: body name: body schema: properties: board_format: description: format of the board, matrix or ij_history board: description: Board data in two dimentional array type: array items: type: array items: integer board_size: description: board size stone_color: description: Stone color for the generated move. 2:Black or 1:White type: integer return_board: description: Return the board in matrix. 0:False or 1:True. type: integer example: |- {"stone_color": 1,"board": [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],"return_board": 1} """ try: req_data = request.get_json(force=True) print(req_data) except: return make_response(jsonify({"error":"Invalid payload"}), 500) board_format = req_data.get('board_format', 'matrix') board = req_data.get('board',[[]]) stone_color = req_data.get('stone_color',0) return_board = req_data.get('return_board',False) pachi_board = pachi_py.CreateBoard(19) if board_format == 'matrix': self.make_board_matrix(pachi_board, board) elif board_format == 'ij_history': stone_color = 2 if stone_color=='Black' else 1 self.make_board_ij_history(pachi_board, board) else: return make_response(jsonify({"error":"Invalid payload"}), 500) _pe=pachi_py.PyPachiEngine(pachi_board,'uct','threads=8,playout=light,maximize_score') next_move = _pe.genmove(stone_color,'') _pe.curr_board.play_inplace(next_move, stone_color) print(_pe.curr_board.coord_to_ij(next_move)) resp = jsonify({ 'move_str':_pe.curr_board.coord_to_str(next_move), 'move_ij':_pe.curr_board.coord_to_ij(next_move), 'stone_color':stone_color, 'board':_pe.curr_board.encode()[0].tolist() if return_board else None}) del _pe return make_response(resp,200)
from gym import envs PATH = "./model.pt" go_env = gym.make('gym_go:go-v0', size=5, reward_method='heuristic') state = go_env.reset() # Reset environment and record the starting state # Train 5x5 model # Policy_grad(100, 10, 0.001, 0.99, go_env, False, True) # Load trained model policy = torch.load(PATH) b = pachi_py.CreateBoard(5) engine = pachi_py.PyPachiEngine(b, b'uct', b'') # pachi_move = engine.genmove(pachi_py.WHITE, b'0') # b.play_inplace(pachi_move, pachi_py.WHITE) # b.play_inplace(9, pachi_py.BLACK) # engine.notify(9, pachi_py.BLACK) # # pachi_move = engine.genmove(pachi_py.WHITE, b'2') # print(pachi_move) # # # print(b.coord_to_ij(8)) # b.play_inplace(9, pachi_py.BLACK) # b.play_inplace(pachi_move, pachi_py.WHITE) done = False
def test_board_sizes(): for s in [9, 19]: b = pachi_py.CreateBoard(s) for player in [pachi_py.BLACK, pachi_py.WHITE]: assert len(b.get_legal_coords(player)) == s*s + 1, \ 'Starting board should have size**2 + 1 legal moves'