def main(): global start_time, time_limit start_time = datetime.now() time_limit = 11.6 raw = Utils.json_input() request = raw['requests'] model = load_best_model() board, player = game_init() if not (request['x'] == -2 and request['y'] == -2): board, player = make_play(request['x'] - 1, request['y'] - 1, board) if player == 1: color = 'B' else: color = 'W' engine = ModelEngine(model, conf['MCTS_SIMULATIONS'], board) x, y, _, _, _, _, _ = engine.genmove(color) response = {} response['x'], response['y'] = x + 1, y + 1 if y == conf['SIZE']: response['x'], response['y'] = -1, -1 Utils.json_output({'response': response}) print(">>>BOTZONE_REQUEST_KEEP_RUNNING<<<") time_limit = 3.6 while True: try: start_time = datetime.now() raw = Utils.json_input() request = raw['requests'] if not (request['x'] == -1 and request['y'] == -1): engine.play('B', request['x'] - 1, request['y'] - 1, update_tree=True) x, y, _, _, _, _, _ = engine.genmove(color) response = {} response['x'], response['y'] = x + 1, y + 1 if y == conf['SIZE']: response['x'], response['y'] = -1, -1 Utils.json_output({'response': response}) print(">>>BOTZONE_REQUEST_KEEP_RUNNING<<<") except json.JSONDecodeError: break
class Engine(object): def __init__(self, model, logfile): self.board, self.player = game_init() self.start_engine(model) self.logfile = logfile def start_engine(self, model): self.engine = ModelEngine(model, conf['MCTS_SIMULATIONS'], self.board) def name(self): return "AlphaGoZero Python - {} - {} simulations".format(self.engine.model.name, conf['MCTS_SIMULATIONS']) def version(self): return __version__ def protocol_version(self): return "2" def list_commands(self): return "" def boardsize(self, size): size = int(size) if size != SIZE: raise Exception("The board size in configuration is {0}x{0} but GTP asked to play {1}x{1}".format(SIZE, size)) return "" def komi(self, komi): # Don't check komi in GTP engine. The algorithm has learned with a specific # komi that we don't have any way to influence after learning. return "" def parse_move(self, move): if move.lower() == 'pass': x, y = 0, SIZE return x, y else: letter = move[0] number = move[1:] x = string.ascii_uppercase.index(letter) if x >= 9: x -= 1 # I is a skipped letter y = int(number) - 1 x, y = x, SIZE - y - 1 return x, y def print_move(self, x, y): x, y = x, SIZE - y - 1 if x >= 8: x += 1 # I is a skipped letter move = string.ascii_uppercase[x] + str(y + 1) return move def play(self, color, move): announced_player = COLOR_TO_PLAYER[color] assert announced_player == self.player x, y = self.parse_move(move) self.board, self.player = self.engine.play(color, x, y) return "" def genmove(self, color): announced_player = COLOR_TO_PLAYER[color] assert announced_player == self.player x, y, policy_target, value, self.board, self.player, policy = self.engine.genmove(color) self.player = self.board[0, 0, 0, -1] # engine updates self.board already with open(self.logfile, 'a') as f: f.write("PLAYER" + str(self.player) + '\n') move_string = self.print_move(x, y) result = move_string return result def clear_board(self): self.board, self.player = game_init() return "" def parse_command(self, line): tokens = line.strip().split(" ") command = tokens[0] args = tokens[1:] method = getattr(self, command) result = method(*args) if not result.strip(): return "=\n\n" return "= " + result + "\n\n"
def play_game(model1, model2, mcts_simulations, stop_exploration, self_play=False, num_moves=None, resign_model1=None, resign_model2=None): board, player = game_init() moves = [] engine1 = ModelEngine(model1, mcts_simulations, resign=resign_model1, temperature=1, board=np.copy(board), add_noise=self_play) engine2 = ModelEngine(model2, mcts_simulations, resign=resign_model2, temperature=1, board=np.copy(board), add_noise=self_play) if self_play: engine2.tree = engine1.tree last_value = None value = None skipped_last = False start = datetime.datetime.now() end_reason = "PLAYED ALL MOVES" if num_moves is None: num_moves = SIZE * SIZE * 2 for move_n in range(num_moves): last_value = value if move_n == stop_exploration: engine1.set_temperature(0) engine2.set_temperature(0) if move_n % 2 == 0: x, y, policy_target, value, _, _, policy = engine1.genmove("B") if y == SIZE + 1: end_reason = 'RESIGN' break engine2.play("B", x, y, update_tree=not self_play) else: x, y, policy_target, value, _, _, policy = engine2.genmove("W") if y == SIZE + 1: end_reason = 'RESIGN' break engine1.play("B", x, y, update_tree=not self_play) move_data = { 'board': np.copy(board), 'policy': policy_target, 'policy_variation': np.linalg.norm(policy_target - policy), 'value': value, 'move': (x, y), 'move_n': move_n, 'player': player , } moves.append(move_data) if skipped_last and y == SIZE: end_reason = "BOTH_PASSED" break skipped_last = y == SIZE if y == SIZE + 1: end_reason = 'RESIGN' break # Swap players board, player = make_play(x, y, board) if conf['SHOW_EACH_MOVE']: # Inverted here because we already swapped players color = "W" if player == 1 else "B" print("%s(%s,%s)" % (color, x, y)) print("") print(show_board(board)) print("") winner, black_points, white_points = get_winner(board) player_string = {1: "B", 0: "D", -1: "W"} if end_reason == "resign": winner_string = "%s+R" % (player_string[player]) else: winner_string = "%s+%s" % (player_string[winner], abs(black_points - white_points)) winner_engine = engine1 if (winner == 1) else engine2 modelB, modelW = model1, model2 if conf['SHOW_END_GAME']: if player == -1: # black played last bvalue, wvalue = value, last_value else: bvalue, wvalue = last_value, value print("") print("B:%s, W:%s" %(modelB.name, modelW.name)) print("Bvalue:%s, Wvalue:%s" %(bvalue, wvalue)) print(show_board(board)) print("Game played (%s: %s) : %s" % (winner_string, end_reason, datetime.datetime.now() - start)) game_data = { 'moves': moves, 'modelB_name': modelB.name, 'modelW_name': modelW.name, 'winner': winner, 'winner_model': winner_engine.model.name, 'result': winner_string, 'resign_model1': resign_model1, 'resign_model2': resign_model2, } return game_data