def select_move(self, game_state: GameState): winning_moves = [] draw_moves = [] losing_moves = [] # loop through all legal moves: for move in game_state.legal_moves(): # state of the game after this move is applied next_state = game_state.apply_move(move) # determine opponent's best outcome given that state opponent_best_outcome = best_result(next_state) our_best_outcome = reverse_game_result(opponent_best_outcome) if our_best_outcome == GameResult.win: winning_moves.append(move) elif our_best_outcome == GameResult.draw: draw_moves.append(move) else: losing_moves.append(move) # try to win, with drawing the next best choice if winning_moves or draw_moves: return random.choice(winning_moves or draw_moves) # lost the game return random.choice(draw_moves)
def get_handicap(sgf): """ sgfファイルの初期ハンディキャップを適用した盤を返す Parameters ---------- sgf : str sgfの棋譜データコンテンツ Returns ------- game_state : GameState ハンディキャップ適用後の盤 first_move_done : bool ハンディキャップ適用があったか(盤面が空でないか) """ go_board = Board(19, 19) first_move_done = False move = None game_state = GameState.new_game(19) if sgf.get_handicap() != None and sgf.get_handicap() != 0: for setup in sgf.get_root().get_setup_stones(): for move in setup: row, col = move go_board.place_stone(Player.black, Point(row + 1, col + 1)) first_move_done = True game_state = GameState(go_board, Player.white, None, move) return game_state, first_move_done
def get_handicap(self, sgf): go_board = Board(19, 19) first_move_done = False game_state = GameState.new_game(19) if sgf.get_handicap() != None and sgf.get_handicap() != 0: for setup in sgf.get_root().get_setup_stones(): for move in setup: row, col = move go_board.place_stone(Player.black, Point(row + 1, col + 1)) first_move_done = True game_state = GameState(go_board, Player.white, None, move) return game_state, first_move_done
def main(): board_size = 4 results = {} start = time.time() for i in range(10): game = GameState.new_game(board_size) bots = { Player.black: RandomBot(), # MinimaxBot(2, capture_diff), Player.white: MCBot(30), } while not game.is_over(): # time.sleep(0.1) # print(chr(27) + "[2J") # print_board(game.board) bot_move = bots[game.next_player].select_move(game) # print_move(game.next_player, bot_move) game = game.apply_move(bot_move) if capture_diff(game) > 0: results[game.next_player] = results.get(game.next_player, 0) + 1 elif capture_diff(game) < 0: results[game.next_player.other] = results.get( game.next_player.other, 0) + 1 # print(game.next_player, capture_diff(game)) print(results, time.time() - start) results = {} start = time.time() for i in range(10): game = GameState.new_game(board_size) bots = { Player.black: RandomBot(), # MinimaxBot(2, capture_diff), Player.white: MinimaxBot(3, capture_diff), } while not game.is_over(): # time.sleep(0.1) # print(chr(27) + "[2J") # print_board(game.board) bot_move = bots[game.next_player].select_move(game) # print_move(game.next_player, bot_move) game = game.apply_move(bot_move) if capture_diff(game) > 0: results[game.next_player] = results.get(game.next_player, 0) + 1 elif capture_diff(game) < 0: results[game.next_player.other] = results.get( game.next_player.other, 0) + 1 # print(game.next_player, capture_diff(game)) print(results, time.time() - start)
def eval_simulate_game( black_agent, white_agent, board_size, ): moves = [] game = GameState.new_game(board_size) agents = { Player.black: black_agent, Player.white: white_agent, } num_moves = 0 while (not game.is_over()) & (num_moves < 2 * board_size * board_size): agents[game.next_player].set_temperature(0.05) next_move = agents[game.next_player].select_move(game) moves.append(next_move) game = game.apply_move(next_move) num_moves += 1 print('number of moves: %d' % num_moves) print_board(game.board) game_result = scoring.compute_game_result(game) print(game_result) return GameRecord( moves=moves, winner=game_result.winner, margin=game_result.winning_margin, )
def __init__(self, go_bot, termination=None, handicap=0, opponent='gnugo', output_sgf='out.sgf', our_color='b'): self.bot = TerminationAgent(go_bot, termination) self.handicap = handicap self.game_state = GameState.new_game(19) self.sgf = SGFWriter(output_sgf) self.our_color = Player.black if our_color == 'b' else Player.white self.their_color = self.our_color.other cmd = self.opponent_cmd(opponent) pipe = subprocess.PIPE self.gtp_stream = subprocess.Popen(cmd, stdin=pipe, stdout=pipe, bufsize=1, universal_newlines=True) # state self._stopped = False
def test_new_game(self): start = GameState.new_game(19) next_state = start.apply_move(Move.play(Point(16, 16))) self.assertEqual(start, next_state.previous_state) self.assertEqual(Player.white, next_state.next_player) self.assertEqual(Player.black, next_state.board.get(Point(16, 16)))
def simulate_game(black_player, white_player, board_size): moves = [] game = GameState.new_game(board_size) agents = { Player.black: black_player, Player.white: white_player, } num_moves = 0 while not game.is_over(): if num_moves < 16: # Pick randomly. agents[game.next_player].set_temperature(1.0) else: # Favor the best-looking move. agents[game.next_player].set_temperature(0.05) next_move = agents[game.next_player].select_move(game) moves.append(next_move) game = game.apply_move(next_move) num_moves += 1 print_board(game.board) game_result = scoring.compute_game_result(game) print(game_result) return GameRecord( moves=moves, winner=game_result.winner, margin=game_result.winning_margin, )
def test_4_alphago_mcts(self): print("TEST 4\n=====================================================") gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only use the first GPU try: tf.config.experimental.set_visible_devices(gpus[0], 'GPU') tf.config.experimental.set_memory_growth(gpus[0], True) tf.config.set_soft_device_placement(True) except RuntimeError as e: print(e) fast_policy = load_prediction_agent( h5py.File('test_alphago_sl_policy.h5', 'r')) strong_policy = load_policy_agent( h5py.File('test_alphago_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('test_alphago_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value, num_simulations=20, depth=5, rollout_limit=10) start = GameState.new_game(19) alphago.select_move(start)
def __init__(self, go_bot, termination=None, handicap=0, opponent='gnugo', output_sgf="out.sgf", our_color='b'): self.bot = TerminationAgent(go_bot, termination) # <1> self.handicap = handicap self._stopped = False # <2> self.game_state = GameState.new_game(19) self.sgf = SGFWriter(output_sgf) # <3> self.our_color = Player.black if our_color == 'b' else Player.white self.their_color = self.our_color.other cmd = self.opponent_cmd(opponent) # <4> pipe = subprocess.PIPE # Depending on your OS, you may need to set bufsize=0 to prevent # readline() from blocking. # See: https://github.com/maxpumperla/deep_learning_and_the_game_of_go/issues/44 self.gtp_stream = subprocess.Popen( cmd, stdin=pipe, stdout=pipe, # <5> bufsize=0)
def run(board_size, first, second): start = time.time() # black_agent = random.choice([first, second]) # white_agent = first if black_agent is second else first black_agent = first white_agent = second agents = { Player.black: black_agent, Player.white: white_agent, } game_state = GameState.new_game(board_size) next_move = None while not game_state.is_over() and ( next_move is None or not next_move.is_pass): # random bot too stupid to stop the game move_timer_start = time.time() next_move = agents[game_state.next_player].select_move(game_state) if game_state.next_player is Player.black and next_move.is_pass: next_move = agents[game_state.next_player].select_move(game_state) print( f'{game_state.next_player} made move in {time.time() - move_timer_start} s' ) print(f'{game_state.next_player} selected {next_move}') game_state = game_state.apply_move(next_move) print(chr(27) + '[2J') # clears board print_board(game_state.board) print('Estimated result: ') print(scoring.compute_game_result(game_state)) print(f'Finished game in {time.time() - start} s') game_result = scoring.compute_game_result(game_state) print(game_result) first_won = False if game_result.winner == Player.black: if black_agent is first: first_won = True else: if white_agent is first: first_won = True if first_won: print("First agent wins!") else: print("Second agent wins!")
def main(): board_size = 9 encoder = zero.ZeroEncoder(board_size) board_input = Input(shape=encoder.shape(), name='board_input') pb = board_input for i in range(4): pb = Conv2D(64, (3, 3), padding='same', data_format='channels_first')(pb) pb = BatchNormalization(axis=1)(pb) pb = Activation('relu')(pb) # Policy output policy_conv = Conv2D(2, (1, 1), data_format='channels_first')(pb) policy_batch = BatchNormalization(axis=1)(policy_conv) policy_relu = Activation('relu')(policy_batch) policy_flat = Flatten()(policy_relu) policy_output = Dense(encoder.num_moves(), activation='softmax')(policy_flat) # Value output value_conv = Conv2D(1, (1, 1), data_format='channels_first')(pb) value_batch = BatchNormalization(axis=1)(value_conv) value_relu = Activation('relu')(value_batch) value_flat = Flatten()(value_relu) value_hidden = Dense(256, activation='relu')(value_flat) value_output = Dense(1, activation='tanh')(value_hidden) model = Model(inputs=[board_input], outputs=[policy_output, value_output]) c1 = zero.ZeroExperienceCollector() c2 = zero.ZeroExperienceCollector() black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0) white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=10, c=2.0) black_agent.set_collector(c1) white_agent.set_collector(c2) print('Starting the game!') game = GameState.new_game(board_size) c1.begin_episode() c2.begin_episode() black_move = black_agent.select_move(game) print('B', black_move) game = game.apply_move(black_move) white_move = white_agent.select_move(game) print('W', white_move) black_move = black_agent.select_move(game) print('B', black_move) c1.complete_episode(1) c2.complete_episode(-1) exp = zero.combine_experience([c1, c2]) black_agent.train(exp, 0.01, 2048)
def best_result(game_state: GameState): if game_state.is_over(): if game_state.winner() == game_state.next_player: return GameResult.win elif game_state.winner() is None: return GameResult.draw else: return GameResult.loss best_result_so_far = GameResult.loss for candidate_move in game_state.legal_moves(): next_state = game_state.apply_move(candidate_move) opponent_best_result = best_result(next_state) our_result = reverse_game_result(opponent_best_result) best_result_so_far = max(our_result, best_result_so_far) return best_result_so_far
def game(self) -> GameState: if not self._game: black = batch_translate_labels_to_coordinates(self.initial_black) white = batch_translate_labels_to_coordinates(self.initial_white) move_points = batch_translate_labels_to_coordinates(self.moves) player = Player.black if self.initial_player == 'b' else Player.white board = Board(19, 19) for b in black: board.place_stone(Player.black, b) for w in white: board.place_stone(Player.white, w) self._game = GameState(board, player, None, None) for move_point in move_points: move = Move.pass_turn() if not move_point else Move.play( move_point) self._game = self._game.apply_move(move) return self._game
def simulate_game(black_player, white_player): moves = [] game = GameState.new_game(BOARD_SIZE) agents = {Player.black: black_player, Player.white: white_player} while not game.is_over(): next_move = agents[game.next_player].select_move(game) moves.append(next_move) game = game.apply_move(next_move) game_result = scoring.compute_game_result(game) print(game_result) return GameRecord(moves=moves, winner=game_result.winner)
def test_encoder(self): alphago = AlphaGoEncoder() start = GameState.new_game(19) next_state = start.apply_move(Move.play(Point(16, 16))) alphago.encode(next_state) self.assertEquals(alphago.name(), 'alphago') self.assertEquals(alphago.board_height, 19) self.assertEquals(alphago.board_width, 19) self.assertEquals(alphago.num_planes, 49) self.assertEquals(alphago.shape(), (49, 19, 19))
def simulate(): assert os.path.exists('agz_bot_train.h5') # load known best bot if os.path.exists('agz_bot.h5'): with h5py.File('agz_bot.h5', 'r') as best_bot: best_agent = zero.load_zero_agent(best_bot) else: return True # learner bot wins! ... by default, since there is no best currently # load learner bot with h5py.File('agz_bot_train.h5') as learn_bot: learner_agent = zero.load_zero_agent(learn_bot) # randomly decide first move black_agent = random.choice([best_agent, learner_agent]) white_agent = best_agent if black_agent is learner_agent else learner_agent agents = { Player.black: black_agent, Player.white: white_agent, } game = GameState.new_game( (learner_agent.encoder.board_size, learner_agent.encoder.board_size)) while not game.is_over(): next_move = agents[game.next_player].select_move(game) game = game.apply_move(next_move) game_result = scoring.compute_game_result(game) game = None del black_agent.model del white_agent.model black_agent = white_agent = None import gc K.clear_session() gc.collect() if game_result.winner == Player.black: if black_agent is best_agent: return False else: if white_agent is best_agent: return False return True # learner won this round
def get_handicap(sgf): # Get handicap stones go_board = Board(19, 19) first_move_done = False move = None game_state = GameState.new_game(19) board_ext = Board_Ext(game_state.board) if sgf.get_handicap() is not None and sgf.get_handicap() != 0: for setup in sgf.get_root().get_setup_stones(): for move in setup: row, col = move go_board.place_stone(Player.black, Point(row + 1, col + 1)) # black gets handicap #My inserting Nail point = Point(row + 1, col + 1) ret = board_ext.place_stone_ext( go_board, 'b', point) # Handicap for black Player #### Nail first_move_done = True game_state = GameState(go_board, Player.white, None, move) return game_state, first_move_done, board_ext
def test_4_alphago_mcts(self): fast_policy = load_prediction_agent( h5py.File('test_alphago_sl_policy.h5', 'r')) strong_policy = load_policy_agent( h5py.File('test_alphago_rl_policy.h5', 'r')) value = load_value_agent(h5py.File('test_alphago_value.h5', 'r')) alphago = AlphaGoMCTS(strong_policy, fast_policy, value, num_simulations=20, depth=5, rollout_limit=10) start = GameState.new_game(19) alphago.select_move(start)
def simulate_game(black_player, white_player): # 9.18 # moves = [] game = GameState.new_game(BOARD_SIZE) agents = { Player.black: black_player, Player.white: white_player, } while not game.is_over(): next_move = agents[game.next_player].select_move(game) # moves.append(next_move) game = game.apply_move(next_move) # print_board(game.board) game_result = scoring.compute_game_result(game) return game_result.winner
def __init__(self, go_bot, termination=None, handicap=0, opponent='gnugo', output_sgf="out.sgf", our_color='b'): self.bot = TerminationAgent(go_bot, termination) # <1> self.handicap = handicap self._stopped = False # <2> self.game_state = GameState.new_game(19) self.sgf = SGFWriter(output_sgf) # <3> self.our_color = Player.black if our_color == 'b' else Player.white self.their_color = self.our_color.other cmd = self.opponent_cmd(opponent) # <4> pipe = subprocess.PIPE self.gtp_stream = subprocess.Popen( cmd, stdin=pipe, stdout=pipe # <5> )
def main(): board_size = 4 game = GameState.new_game(board_size) bot = MinimaxBot(5, capture_diff) while not game.is_over(): print(chr(27) + "[2J") print_board(game.board) if game.next_player == Player.black: valid = False while not valid: human_move = input('-- ') human_move = human_move.upper() point = point_from_coords(human_move.strip()) move = Move.play(point) valid = game.is_valid_move(move) else: move = bot.select_move(game) print_move(game.next_player, move) game = game.apply_move(move)
def __init__(self, termination_agent, termination=None): self.agent = termination_agent self.game_state = GameState.new_game(19) self._input = sys.stdin self._output = sys.stdout self._stopped = False self.handlers = { 'boardsize': self.handle_boardsize, 'clear_board': self.handle_clear_board, 'fixed_handicap': self.handle_fixed_handicap, 'genmove': self.handle_genmove, 'known_command': self.handle_known_command, 'komi': self.ignore, 'showboard': self.handle_showboard, 'time_settings': self.ignore, 'time_left': self.ignore, 'play': self.handle_play, 'protocol_version': self.handle_protocol_version, 'quit': self.handle_quit, }
def simulate_game(black_player, white_player, board_size): moves = [] game = GameState.new_game(board_size) agents = { Player.black: black_player, Player.white: white_player, } while not game.is_over(): next_move = agents[game.next_player].select_move(game) moves.append(next_move) game = game.apply_move(next_move) print_board(game.board) game_result = scoring.compute_game_result(game) print(game_result) return GameRecord( moves=moves, winner=game_result.winner, margin=game_result.winning_margin, )
def simulate_game(board_size, black_agent, black_collector, white_agent, white_collector): print('Starting the game!') game = GameState.new_game(board_size) agents = { Player.black: black_agent, Player.white: white_agent, } black_collector.begin_episode() white_collector.begin_episode() while not game.is_over(): next_move = agents[game.next_player].select_move(game) game = game.apply_move(next_move) game_result = scoring.compute_game_result(game) if game_result.winner == Player.black: black_collector.complete_episode(1) white_collector.complete_episode(-1) else: black_collector.complete_episode(-1) white_collector.complete_episode(1)
def main(): board_size = 5 pygame.init() pygame.display.set_caption('Goban') game = GameState.new_game(board_size) bots = { gotypes.Player.black: RandomBot(), gotypes.Player.white: AlphaBetaAgent(2, capture_diff), } while not game.is_over(): #time.sleep(0.3) print(chr(27) + "[2J") print_board(game.board) GuiBoard.draw(game.board) bot_move = bots[game.next_player].select_move(game) print_move(game.next_player, bot_move) game = game.apply_move(game.next_player, bot_move) print("winner is:", game.winner()) print("score is is:", compute_game_result(game)) input("Press Enter to continue...")
def generate_game(board_size, game_id_str, rounds_per_move=10, c=2.0): start = time.time() print(f'Generating {game_id_str}...') game = GameState.new_game(board_size) encoder = zero.ZeroEncoder(board_size) # load current best agent, if any # has to be able to pass through cPickle which is why we don't just reuse it if os.path.exists('agz_bot.h5'): with h5py.File('agz_bot.h5') as bot_file: black_agent = zero.load_zero_agent(bot_file) white_agent = zero.load_zero_agent(bot_file) else: print(f'WARN: using default model to generate {game_id_str}') model = zero_model(board_size) black_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c) white_agent = zero.ZeroAgent(model, encoder, rounds_per_move=rounds_per_move, c=c) agents = { Player.black: black_agent, Player.white: white_agent, } c1 = zero.ZeroExperienceCollector() c2 = zero.ZeroExperienceCollector() black_agent.set_collector(c1) white_agent.set_collector(c2) c1.begin_episode() c2.begin_episode() while not game.is_over(): next_move = agents[game.next_player].select_move(game) game = game.apply_move(next_move) game_result = scoring.compute_game_result(game) if game_result.winner == Player.black: c1.complete_episode(1) c2.complete_episode(-1) else: c1.complete_episode(-1) c2.complete_episode(1) combined = zero.combine_experience([c1, c2], board_size) c1 = c2 = game_result = None model = encoder = None game = None del black_agent.model del white_agent.model black_agent = white_agent = None import gc K.clear_session() gc.collect() return combined, game_id_str, time.time() - start
from dlgo.gosgf import Sgf_game from dlgo.goboard_fast import GameState, Move from dlgo.gotypes import Point from dlgo.utils import print_board sgf_content = "(;GM[1]FF[4]SZ[9];B[ee];W[ef];B[ff]" + \ ";W[df];B[fe];W[fc];B[ec];W[gd];B[fb])" sgf_game = Sgf_game.from_string(sgf_content) game_state = GameState.new_game(19) for item in sgf_game.main_sequence_iter(): color, move_tuple = item.get_move() if color is not None and move_tuple is not None: row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) game_state = game_state.apply_move(move) print_board(game_state.board)
def handle_clear_board(self): self.game_state = GameState.new_game(19) return response.success()
class Position: ruleset: str = 'japanese' komi: float = 6.5 initial_black: Optional[List[str]] = None initial_white: Optional[List[str]] = None initial_player: Optional[str] = 'b' moves: Optional[List[str]] = None _game: Optional[GameState] = None @property def game(self) -> GameState: if not self._game: black = batch_translate_labels_to_coordinates(self.initial_black) white = batch_translate_labels_to_coordinates(self.initial_white) move_points = batch_translate_labels_to_coordinates(self.moves) player = Player.black if self.initial_player == 'b' else Player.white board = Board(19, 19) for b in black: board.place_stone(Player.black, b) for w in white: board.place_stone(Player.white, w) self._game = GameState(board, player, None, None) for move_point in move_points: move = Move.pass_turn() if not move_point else Move.play( move_point) self._game = self._game.apply_move(move) return self._game def command(self, move: Move = None) -> Tuple[Command, int]: # Get the game that reflects the passed move having been played (if supplied). game = self.game if not move else self.game.apply_move(move) # Process the game board to get the necessary information to generate canonical encodings. point_plus_code: List[Tuple[Point, int]] = [] for i in intersections: color = game.board.get(i) if not color: code = 0 if game.is_valid_move(Move.play(i)) else 3 else: code = 1 if color == Player.black else 2 if code: point_plus_code.append((i, code)) # Select the transformation that leads to the lowest canonical position representation. selected_form = float('inf') selected_ordinal = -1 selected_transformation = None for ordinal, transformation in enumerate(transformations): encoding = self._encode_point_colorings(point_plus_code, transformation) if encoding < selected_form: selected_form = encoding selected_ordinal = ordinal selected_transformation = transformation # Encode the resulting board position as a string. position_representation = self._convert_code_to_dense_string( selected_form) # Transform the starting stone points using the selected transformation. initial_positions_plus_colors: List[Tuple[Point, int]] = [] initial_stones: List[Move] = [] if self.initial_black: transformed_black_points = [ selected_transformation(translate_label_to_point(x)) for x in self.initial_black ] initial_positions_plus_colors += [ (x, 1) for x in transformed_black_points ] initial_stones += [ MoveInfo(KataGoPlayer.b, coords_from_point(x)) for x in transformed_black_points ] if self.initial_white: transformed_white_points = [ selected_transformation(translate_label_to_point(x)) for x in self.initial_white ] initial_positions_plus_colors += [ (x, 2) for x in transformed_white_points ] initial_stones += [ MoveInfo(KataGoPlayer.w, coords_from_point(x)) for x in transformed_white_points ] initial_form = self._encode_point_colorings( initial_positions_plus_colors) initial_representation = self._convert_code_to_dense_string( initial_form) # Compose an ID to use when communicating with KataGo. Because it is possible to arrive at the same position # in multiple paths and/or transformations, this ID does NOT contain the information necessary to return to the # original representation. That exists for communicating between the UI and the server ONLY. next_player = "b" if game.next_player == Player.black else "w" id = f'{self.ruleset}_{self.komi}_{next_player}_{initial_representation}_{position_representation}' # Build the command! command = Command() command.id = id command.komi = self.komi command.initialPlayer = KataGoPlayer.b if self.initial_player == 'b' else KataGoPlayer.w command.rules = self.ruleset command.initialStones = initial_stones command.moves = [] player = command.initialPlayer for move in game.history: move_info = MoveInfo( player, 'pass' if not move or move.is_pass else coords_from_point( selected_transformation(move.point))) command.moves.append(move_info) player = player.opposite command.analyzeTurns = [len(command.moves)] return command, selected_ordinal def _encode_point_colorings( self, point_plus_code: List[Tuple[Point, int]], transformation: Optional[Callable[[Optional[Point]], Optional[Point]]] = None ) -> int: encoding = 0 for point, code in point_plus_code: transformed = transformation(point) if transformation else point power = (transformed.row - 1) * 19 + (transformed.col - 1) encoding += code * (4**power) return encoding def _convert_code_to_dense_string(self, value: int) -> str: if not value: value = 0 bit_count = value.bit_length() return base64.b64encode( value.to_bytes(bit_count // 8 + (0 if bit_count % 8 == 0 else 1), byteorder='big')).decode('utf-8')