def _contest(self, player1, player2, first_player): # old model play with new model players = [player2, None, player1] player_index = first_player gomoku = Gomoku(self.n, self.n_in_row, first_player) self.gomoku_gui.reset_status() while True: player = players[player_index + 1] # select best move prob = player.get_action_probs(gomoku) best_move = int(np.argmax(np.array(list(prob)))) # execute move gomoku.execute_move(best_move) self.gomoku_gui.execute_move(player_index, best_move) # check game status ended, winner = gomoku.get_game_status() if ended == 1: return winner # update search tree player1.update_with_move(best_move) player2.update_with_move(best_move) # next player player_index = -player_index
def self_play(self, first_color): """ This function executes one episode of self-play, starting with player 1. As the game is played, each turn is added as a training example to train_examples. The game is played till the game ends. After the game ends, the outcome of the game is used to assign values to each example in train_examples. """ train_examples = [] gomoku = Gomoku(self.n, self.n_in_row, first_color) mcts = MCTS("./models/checkpoint.pt", self.thread_pool_size, self.c_puct, self.num_mcts_sims, self.c_virtual_loss, self.action_size, self.mcts_use_gpu) episode_step = 0 while True: episode_step += 1 # prob temp = self.temp if episode_step <= self.explore_num else 0 prob = np.array(list(mcts.get_action_probs(gomoku, temp))) # generate sample board = tuple_2d_to_numpy_2d(gomoku.get_board()) last_action = gomoku.get_last_move() cur_player = gomoku.get_current_color() sym = self.get_symmetries(board, prob) for b, p in sym: train_examples.append([b, last_action, cur_player, p]) # dirichlet noise legal_moves = list(gomoku.get_legal_moves()) noise = 0.25 * np.random.dirichlet( self.dirichlet_alpha * np.ones(np.count_nonzero(legal_moves))) prob_noise = 0.75 * prob j = 0 for i in range(len(prob_noise)): if legal_moves[i] == 1: prob_noise[i] += noise[j] j += 1 prob_noise /= np.sum(prob_noise) action = np.random.choice(len(prob_noise), p=prob_noise) # execute move gomoku.execute_move(action) mcts.update_with_move(action) # is ended ended, winner = gomoku.get_game_status() if ended == 1: # b, last_action, cur_player, p, v return [(x[0], x[1], x[2], x[3], x[2] * winner) for x in train_examples]
def play_with_human(self, human_first=True, checkpoint_name="best_checkpoint"): t = threading.Thread(target=self.gomoku_gui.loop) t.start() # load best model libtorch_best = NeuralNetwork('./models/best_checkpoint.pt', self.libtorch_use_gpu, self.num_mcts_threads * 2) mcts_best = MCTS(libtorch_best, self.num_mcts_threads * 2, self.c_puct, self.num_mcts_sims * 4, self.c_virtual_loss, self.action_size) # create gomoku game human_color = self.gomoku_gui.get_human_color() gomoku = Gomoku(self.n, self.n_in_row, human_color if human_first else -human_color) players = ["alpha", None, "human" ] if human_color == 1 else ["human", None, "alpha"] player_index = human_color if human_first else -human_color while True: player = players[player_index + 1] # select move if player == "alpha": prob = mcts_best.get_action_probs(gomoku) best_move = int(np.argmax(np.array(list(prob)))) self.gomoku_gui.execute_move(player_index, best_move) else: self.gomoku_gui.set_is_human(True) # wait human action while self.gomoku_gui.get_is_human(): time.sleep(0.1) best_move = self.gomoku_gui.get_human_move() # execute move gomoku.execute_move(best_move) # check game status ended, winner = gomoku.get_game_status() if ended == 1: break # update tree search mcts_best.update_with_move(best_move) # next player player_index = -player_index print("HUMAN WIN" if winner == human_color else "ALPHA ZERO WIN") t.join()
def _contest(self, network1, network2, first_player, show): # create MCTS player1 = MCTS(network1, self.num_mcts_threads, self.c_puct, self.num_mcts_sims, self.c_virtual_loss, self.action_size) player2 = MCTS(network2, self.num_mcts_threads, self.c_puct, self.num_mcts_sims, self.c_virtual_loss, self.action_size) # prepare players = [player2, None, player1] player_index = first_player gomoku = Gomoku(self.n, self.n_in_row, first_player) if show: self.gomoku_gui.reset_status() # play while True: player = players[player_index + 1] # select best move prob = player.get_action_probs(gomoku) best_move = int(np.argmax(np.array(list(prob)))) # execute move gomoku.execute_move(best_move) if show: self.gomoku_gui.execute_move(player_index, best_move) # check game status ended, winner = gomoku.get_game_status() if ended == 1: return winner # update search tree player1.update_with_move(best_move) player2.update_with_move(best_move) # next player player_index = -player_index
def self_play(self, first_color, libtorch, show): """ This function executes one episode of self-play, starting with player 1. As the game is played, each turn is added as a training example to train_examples. The game is played till the game ends. After the game ends, the outcome of the game is used to assign values to each example in train_examples. """ train_examples = [] player1 = MCTS(libtorch, self.num_mcts_threads, self.c_puct, self.num_mcts_sims, self.c_virtual_loss, self.action_size) player2 = MCTS(libtorch, self.num_mcts_threads, self.c_puct, self.num_mcts_sims, self.c_virtual_loss, self.action_size) players = [player2, None, player1] player_index = 1 gomoku = Gomoku(self.n, self.n_in_row, first_color) if show: self.gomoku_gui.reset_status() episode_step = 0 while True: episode_step += 1 player = players[player_index + 1] # get action prob if episode_step <= self.num_explore: prob = np.array( list(player.get_action_probs(gomoku, self.temp))) else: prob = np.array(list(player.get_action_probs(gomoku, 0))) # generate sample board = tuple_2d_to_numpy_2d(gomoku.get_board()) last_action = gomoku.get_last_move() cur_player = gomoku.get_current_color() sym = self.get_symmetries(board, prob, last_action) for b, p, a in sym: train_examples.append([b, a, cur_player, p]) # dirichlet noise legal_moves = list(gomoku.get_legal_moves()) noise = 0.1 * np.random.dirichlet( self.dirichlet_alpha * np.ones(np.count_nonzero(legal_moves))) prob = 0.9 * prob j = 0 for i in range(len(prob)): if legal_moves[i] == 1: prob[i] += noise[j] j += 1 prob /= np.sum(prob) # execute move action = np.random.choice(len(prob), p=prob) if show: self.gomoku_gui.execute_move(cur_player, action) gomoku.execute_move(action) player1.update_with_move(action) player2.update_with_move(action) # next player player_index = -player_index # is ended ended, winner = gomoku.get_game_status() if ended == 1: # b, last_action, cur_player, p, v return [(x[0], x[1], x[2], x[3], x[2] * winner) for x in train_examples]
# coding: utf-8 import os import sys sys.path.append('../build') from library import Gomoku, MCTS import numpy as np import time if __name__ == "__main__": gomoku = Gomoku(15, 5, 1) gomoku.execute_move(0 + 40) gomoku.execute_move(99) gomoku.execute_move(1 + 40) gomoku.execute_move(98) gomoku.execute_move(2 + 40) gomoku.execute_move(97) gomoku.execute_move(3 + 40) gomoku.execute_move(96) gomoku.display() mcts = MCTS("./models/checkpoint.pt", 4, 2.5, 1600, 2.5, 225, True) print("RUNNING") while True: time_start=time.time() res = mcts.get_action_probs(gomoku, 1) time_end=time.time() print('get_action_probs', time_end - time_start)
example_batch = list(zip(board_batch, last_action_batch, cur_player_batch, p_batch.cpu().numpy().tolist(), v_batch.cpu().numpy().tolist())) print('train\n', example_batch) policy_value_net.train(example_batch) # test infer print('infer \n', policy_value_net.infer(list(zip(board_batch, last_action_batch, cur_player_batch)))) # test libtorch nn = neural_network.NeuralNetWorkWrapper(lr, l2, kl_targ, epochs, 256, 10, 100) nn.save_model(folder="models", filename="checkpoint") # nn.load_model(folder="models", filename="checkpoint") gomoku = Gomoku(10, 5, 1) gomoku.execute_move(3) gomoku.execute_move(4) gomoku.execute_move(6) gomoku.execute_move(23) gomoku.execute_move(8) gomoku.execute_move(9) gomoku.execute_move(78) gomoku.execute_move(0) gomoku.execute_move(17) gomoku.execute_move(7) gomoku.execute_move(19) gomoku.execute_move(67) gomoku.execute_move(60) gomoku.execute_move(14) gomoku.execute_move(11) gomoku.execute_move(2)