def capture_board(): # another small board, this one with imminent captures # # X # 0 1 2 3 4 5 6 # . . B B . . . 0 # . B W W B . . 1 # . B W . . . . 2 # Y . . B . . . . 3 # . . . . W B . 4 # . . . W . W B 5 # . . . . W B . 6 # # current_player = black gs = go.GameState(size=7) black = [(2, 0), (3, 0), (1, 1), (4, 1), (1, 2), (2, 3), (5, 4), (6, 5), (5, 6)] white = [(2, 1), (3, 1), (2, 2), (4, 4), (3, 5), (5, 5), (4, 6)] for B in black: gs.do_move(B, go.BLACK) for W in white: gs.do_move(W, go.WHITE) gs.current_player = go.BLACK return gs
def simple_board(): # make a tiny board for the sake of testing and hand-coding expected results # # X # 0 1 2 3 4 5 6 # B W . . . . . 0 # B W . . . . . 1 # B . . . B . . 2 # Y . . . B k B . 3 # . . . W B W . 4 # . . . . W . . 5 # . . . . . . . 6 # # where k is a ko position (white was just captured) gs = go.GameState(size=7) # ladder-looking thing in the top-left gs.do_move((0, 0)) # B gs.do_move((1, 0)) # W gs.do_move((0, 1)) # B gs.do_move((1, 1)) # W gs.do_move((0, 2)) # B # ko position in the middle gs.do_move((3, 4)) # W gs.do_move((3, 3)) # B gs.do_move((4, 5)) # W gs.do_move((4, 2)) # B gs.do_move((5, 4)) # W gs.do_move((5, 3)) # B gs.do_move((4, 3)) # W - the ko position gs.do_move((4, 4)) # B - does the capture return gs
def self_atari_board(): # another tiny board for testing self-atari specifically. # positions marked with 'a' are self-atari for black # # X # 0 1 2 3 4 5 6 # a W . . . W B 0 # . . . . . . . 1 # . . . . . . . 2 # Y . . W . W . . 3 # . W B a B W . 4 # . . W W W . . 5 # . . . . . . . 6 # # current_player = black gs = go.GameState(size=7) gs.do_move((2, 4), go.BLACK) gs.do_move((4, 4), go.BLACK) gs.do_move((6, 0), go.BLACK) gs.do_move((1, 0), go.WHITE) gs.do_move((5, 0), go.WHITE) gs.do_move((2, 3), go.WHITE) gs.do_move((4, 3), go.WHITE) gs.do_move((1, 4), go.WHITE) gs.do_move((5, 4), go.WHITE) gs.do_move((2, 5), go.WHITE) gs.do_move((3, 5), go.WHITE) gs.do_move((4, 5), go.WHITE) return gs
def convert_game(self, file_name): with open(file_name, 'r') as file_object: sgf_object = SGFParser(file_object.read()) c = sgf_object.parse().cursor() tensors = [] actions = [] gs = go.GameState() proc = Preprocess() while True: try: move = self.parse_raw_move(c.next()) actions.append(self.encode_label(move)) gs.do_move(move) tensors.append(proc.state_to_tensor(gs)) except GameTreeEndError: # remove last board state since it has no label tensors = tensors[0:-1] break return zip(tensors, actions)
def _sgf_init_gamestate(sgf_root): """Helper function to set up a GameState object from the root node of an SGF file """ props = sgf_root.properties s_size = props.get('SZ', ['19'])[0] s_player = props.get('PL', ['B'])[0] # init board with specified size gs = go.GameState(int(s_size)) # handle 'add black' property if 'AB' in props: for stone in props['AB']: gs.do_move(_parse_sgf_move(stone), go.BLACK) # handle 'add white' property if 'AW' in props: for stone in props['AW']: gs.do_move(_parse_sgf_move(stone), go.WHITE) # setup done; set player according to 'PL' property gs.current_player = go.BLACK if s_player == 'B' else go.WHITE return gs
def clear(self): self._state = go.GameState(self._state.size, enforce_superko=True)
def __init__(self, player): self._state = go.GameState(enforce_superko=True) self._player = player
def set_size(self, n): self._state = go.GameState(n, enforce_superko=True)
def __init__(self, player): self._state = go.GameState() self._player = player
def set_size(self, n): self._state = go.GameState(n)
def clear(self): self._state = go.GameState(self._state.size)
def run_n_games(optimizer, lr, learner, opponent, num_games, mock_states=[]): '''Run num_games games to completion, keeping track of each position and move of the learner. (Note: learning cannot happen until all games have completed) ''' board_size = learner.policy.model.input_shape[-1] states = [go.GameState(size=board_size) for _ in range(num_games)] learner_net = learner.policy.model # Allowing injection of a mock state object for testing purposes if mock_states: states = mock_states # Create one list of features (aka state tensors) and one of moves for each game being played. state_tensors = [[] for _ in range(num_games)] move_tensors = [[] for _ in range(num_games)] # List of booleans indicating whether the 'learner' player won. learner_won = [None] * num_games # Start all odd games with moves by 'opponent'. Even games will have 'learner' black. learner_color = [ go.BLACK if i % 2 == 0 else go.WHITE for i in range(num_games) ] odd_states = states[1::2] moves = opponent.get_moves(odd_states) for st, mv in zip(odd_states, moves): st.do_move(mv) current = learner other = opponent idxs_to_unfinished_states = {i: states[i] for i in range(num_games)} while len(idxs_to_unfinished_states) > 0: # Get next moves by current player for all unfinished states. moves = current.get_moves(idxs_to_unfinished_states.values()) just_finished = [] # Do each move to each state in order. for (idx, state), mv in zip(idxs_to_unfinished_states.iteritems(), moves): # Order is important here. We must get the training pair on the unmodified state before # updating it with do_move. is_learnable = current is learner and mv is not go.PASS if is_learnable: (st_tensor, mv_tensor) = _make_training_pair(state, mv, learner.policy.preprocessor) state_tensors[idx].append(st_tensor) move_tensors[idx].append(mv_tensor) state.do_move(mv) if state.is_end_of_game(): learner_won[idx] = state.get_winner_color( ) == learner_color[idx] just_finished.append(idx) # Remove games that have finished from dict. for idx in just_finished: del idxs_to_unfinished_states[idx] # Swap 'current' and 'other' for next turn. current, other = other, current # Train on each game's results, setting the learning rate negative to 'unlearn' positions from # games where the learner lost. for (st_tensor, mv_tensor, won) in zip(state_tensors, move_tensors, learner_won): K.set_value(optimizer.lr, abs(lr) * (+1 if won else -1)) learner_net.train_on_batch(np.concatenate(st_tensor, axis=0), np.concatenate(mv_tensor, axis=0)) # Return the win ratio. wins = sum(state.get_winner_color() == pc for (state, pc) in zip(states, learner_color)) return float(wins) / num_games