def suggest_move(self, position): if position.caps[0] + 50 < position.caps[1]: return gtp.RESIGN start = time.time() # 获取当前这一步的特征概率,进行判断搜索 move_probs = self.policy_network.run(position) # 创建当前这一步的根结点 for i in range(9): for j in range(9): x = go.is_eyeish(position.board, (i, j)) #print(x) if position.board[i][j] != go.EMPTY: move_probs[i][j] = float(0) elif x != None and x != position.to_play: move_probs[i][j] = float(0) # print(move_probs) root = MCTSNode.root_node(position, move_probs) # print('进入蒙特卡洛搜索树') while time.time() - start < self.seconds_per_move: self.tree_search(root) # print('蒙特卡洛搜索树结束') # 如果自己拒绝了pass这一步,这个ai会开始填充自己的眼,所以要进行判断,如果是非法的行棋要找出来。 # 返回值为一个根节点的所有子节点中的最大的那一个 if position.n < 45: # print(str(root.children[(4, 4)].Q)) # print(max(root.children.keys(), key=lambda move, root=root: root.children[move].Q)) return max(root.children.keys(), key=lambda move, root=root: root.children[move].N) else: while True: max_move = max(root.children.keys(), key=lambda move, root=root: root.children[move].N) # print(move_probs) # root.children.pop(max_move) x = max_move[0] y = max_move[1] if go.is_eyeish(position.board, max_move) != position.to_play and move_probs[x][y] != float(0): return max(root.children.keys(), key=lambda move, root=root: root.children[move].N) elif move_probs[x][y] == float(0): position.pass_move(mutate=True) return None else: # root.children[max_move] = float(0) root.children.pop(max_move)
def play_valid_move(self, position, move_probs): for move in sorted_moves(move_probs): if go.is_eyeish(position.board, move): move_probs[move[0]][move[1]] = float(0) continue try: # 判断当前一个可行的行棋策略,是不是合法的,不合法去寻找下一个点,合法的就返回这个点 candidate_pos = position.play_move(move, mutate=True) except go.IllegalMove: continue else: return candidate_pos # 没有找到适合的点的话,就返回pass return position.pass_move(mutate=True)
def is_move_reasonable(position, move): return position.is_move_legal(move) and go.is_eyeish(position.board, move) != position.to_play