def MCTS_Search(board, player, num_reads, n_net): root = Node(board, checkers.get_all_moves(board, player), player, move=None, parent=ParentRootNode()) for i in range(num_reads): leaf = root.select_leaf() player = checkers.switch_player(player) child_prior_prob, value = n_net( checkers.get_state2(leaf.board, leaf.player)) # print(child_prior_prob) # print("The number of reads", i) if checkers.isTerminal(board) or checkers.get_all_moves( leaf.board, leaf.player) == []: print("Finished Game") leaf.backpropagate(value) leaf.print_tree() else: child_prior_prob = child_prior_prob.cpu().detach().numpy().reshape( -1) leaf.expand_and_evaluate(child_prior_prob) leaf.backpropagate(value) root.print_tree() return root
def maybe_add_child(self, move): # print(move) # print("Possible moves") # print(len(self.possible_moves)) if len(self.possible_moves) == move: print(self.child_score()) print(move) print("Possible moves") print(len(self.possible_moves)) print(self.child_prior_probability) if move not in self.children: new_board = checkers.apply_move(self.board, self.possible_moves[move][0], self.possible_moves[move][1], self.player) player2 = checkers.switch_player(self.player) if self.is_board_in_MCTS(new_board, player2): m = self.child_score() m[move] = m.min()-1 move = np.argmax(m[0:(len(self.possible_moves))]) new_board = checkers.apply_move(self.board, self.possible_moves[move][0], self.possible_moves[move][1], self.player) player2 = checkers.switch_player(self.player) self.children[move] = Node(new_board, checkers.get_all_moves(new_board, player2), player2, move=move, parent=self) #checkers.print_board(self.children[move].board) return self.children[move]
def MCTS_self_play(nnet, num_games, s_index, iteration): data_x = [] for itt in tqdm(range(s_index, num_games + s_index)): board = checkers.initial_board(board_size, board_size) #board = checkers.initial_b6() player = 1 data = [] value = 0 num_moves = 0 t = 1 while checkers.isTerminal(board, player) is not True: # if num_moves > 15: # t = 0.1 root = MCTS_Search(board, player, roll_out, nnet) # print("The turn of player {:d} and Moves {:d}".format(player, num_moves)) # checkers.print_board(root.board) policy = get_policy(root, t) data.append([board, player, policy]) move = np.argmax(policy) board = checkers.apply_move(root.board, root.possible_moves[move][0], root.possible_moves[move][1], root.player) player = checkers.switch_player(player) if len(checkers.get_all_moves(board, player)) == 0: # Player == 1 means White pieces # print("Game Finished") if player == 1: value = -1 elif player == 2: value = 1 else: value = 0 break if num_moves == 150: value = 0 break num_moves += 1 for ind, dx in enumerate(data): s, pl, po = dx if ind == 0: data_x.append([checkers.get_state2(s, pl), po, 0]) else: data_x.append([checkers.get_state2(s, pl), po, value]) del data # filename = "MCTS_iteration-{:d}_game-{:d}.p".format(iteration, itt) # save_data(filename, data_x) return data_x
def MCTS_self_play(nnet, num_games, s_index, iteration): for itt in tqdm(range(s_index, num_games + s_index)): board = checkers.initial_board(board_size, board_size) player = 1 data = [] value = 0 num_moves = 0 t = 1 while checkers.isTerminal(board): if num_moves > 15: t = 0.1 root = MCTS_Search(board, player, 500, nnet) policy = get_policy(root, t) data.append([board, player, policy]) move = np.argmax(policy) board = checkers.apply_move(root.board, root.possible_moves[move][0], root.possible_moves[move][1], root.player) player = checkers.switch_player(player) if len(checkers.get_all_moves(board, player)) == 0: # Player == 1 means White pieces if player == 1: value = 1 elif player == 2: value = -1 else: value = 0 if num_moves == 150: value = 0 break num_moves += 1 data_x = [] for ind, d in enumerate(data): s, pl, po = d if ind == 0: data_x.append([s, pl, po, 0]) else: data_x.append([s, pl, po, value]) del data filename = "MCTS_iteration-{:d}_game-{:d}.p".format(iteration, itt) save_data(filename, data_x) return
def MCTS_Play_WithRandom(nnet, num_games): number_of_wins = 0 number_of_draws = 0 for itt in tqdm(range(num_games)): board = checkers.initial_board(board_size, board_size) # board = checkers.initial_b6() player = 1 num_moves = 0 t = 1 while checkers.isTerminal(board, player) is not True: # if num_moves > 15: # t = 0.1 if player == 1: root = MCTS_Search(board, player, roll_out, nnet) policy = get_policy(root, t) move = np.argmax(policy) board = checkers.apply_move(root.board, root.possible_moves[move][0], root.possible_moves[move][1], root.player) else: move = checkers.get_random_move(board, player) board = checkers.apply_move(board, move[0], move[1], player) # print("The turn of player {:d} and Moves {:d}".format(player, num_moves)) # checkers.print_board(board) player = checkers.switch_player(player) if len(checkers.get_all_moves(board, player)) == 0: # Player == 1 means White pieces # print("Game Finished") if player == 2: number_of_wins += 1 break if num_moves == 200: number_of_draws += 1 break num_moves += 1 return number_of_wins, number_of_draws
del data filename = "MCTS_iteration-{:d}_game-{:d}.p".format(iteration, itt) save_data(filename, data_x) return def MCTS_run(): return def save_data(name, data): data1 = open(name, 'wb') pickle.dump(data, data1, protocol=pickle.HIGHEST_PROTOCOL) data1.close() def load_data(name): c_name = name + ".p" data1 = open(c_name, 'rb') return pickle.load(data1) board_n = checkers.initial_board(8, 8) player_n = 1 possible_moves_n = checkers.get_all_moves(board_n, player_n) print(possible_moves_n) a = Node(board_n, possible_moves_n, player_n, ParentRootNode()) MCTS_Search(board_n, player_n, 2000, Nnet.Net())