def mcts(board, player, root, network): """ board: np.array((3, 19, 19)) take board, player turn (0, 1), root node return next move, updated board, policy vector, next root and boolean for game status """ # parameters: number of search trials = 6 # build tree for _ in range(trials): search(root, board, player, network) # reshape policy to (361) p = np.ones(361) - (board[:, :, 0] + board[:, :, 1]).flatten() p[p == 1] = root.get_mcts() * (1 - 2 * player) # get coordinates of chosen move, and update board n = root.get_best_move(player) # update board x, y = get_pos_on_board(board, n) put_on_board(board, (x, y), player, 1) # if unexplored child child = root.get_child(n) if child.leaf(): expand(child, board, player ^ 1, network) # get status game _, e = evaluate(board, player, (x, y)) return ((x, y), board, p, child, e)
def select(node, board, player): """ return chosen node, updated board, new coordinates """ # choose next node, best neg for black n = np.argmax(node.get_policy() * (1 - 2 * player)) child = node.get_child(n) child.add_frequency() # get coordinates of next move, and update board x, y = get_pos_on_board(board, n) put_on_board(board, (x, y), player, 1) return child, board, (x, y), player ^ 1
def search(node, board, player, network): """ node: object Node board: np.array(3,19,19) player: 0 for white, 1 for black do actions on a level of deepness """ child, board, pos, next_player = select(node, board, player) # evaluate or keep searching if child.leaf(): value = evaluate(board, player, pos) # if not a winning move if not value: value = expand(child, board, next_player, network) else: value = search(child, board, next_player, network) # clean board and back propagate put_on_board(board, pos, player, 0) child.score(value) return value
def human_turn(board, node, player, net): print("Your turn, Human") e = 0 while (not e): try: x = int(input("x: ")) y = int(input("y: ")) e = 1 except: print("wrong format, only integers required") q = input("quit? (y/n): ") if q == "y": exit(0) pos = (x, y) put_on_board(board, pos, player, 1) node = update_turn(board, player ^ 1, node, net, pos) _, r = evaluate(board, player, pos) return node, r
def mcts(board, player, root, network): """ board: np.array((3, 19, 19)) take board, player turn (0, 1), root node return next move, updated board, policy vector, next root and boolean for game status """ # parameters: number of search trials = 6 # build tree for _ in range(trials): search(root, board, player, network) # reshape policy to (361) p = np.ones(361) - (board[:, :, 0] + board[:, :, 1]).flatten() p[p == 1] = root.get_policy() * (1 - 2 * player) # get coordinates of chosen move, and update board n = root.get_max_frequency_move() x, y = get_pos_on_board(board, n) put_on_board(board, (x, y), player, 1) return ((x, y), board, p, root.get_child(n), evaluate(board, player, (x, y)))