def __init__(self, name): self.app = Flask(name) self.app.add_url_rule("/api/state", "api/state", self._wrap_endpoint(ChessServer.serve_state)) self.app.add_url_rule("/api/new", "api/new", self._wrap_endpoint(ChessServer.serve_new_game)) self.app.add_url_rule("/api/move", "api/move", self._wrap_endpoint(ChessServer.serve_move)) self.app.add_url_rule("/", "serve_client_r", self._wrap_endpoint(ChessServer.serve_client)) self.app.add_url_rule("/<path:path>", "serve_client", self._wrap_endpoint(ChessServer.serve_client)) self._gamestate = GameState() net = NeuralNetAPI() # Loading network player_agents = { "raw_net": RawNetAgent(net), "mcts": MCTSAgent(net, virtual_loss=3, threads=BATCH_SIZE, cpuct=CPUCT, dirichlet_epsilon=DIRICHLET_EPSILON), } self.agent = player_agents["raw_net"] # Setting up agent
def _run_single_playout(self, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=None): """ This function works recursively until a leaf or terminal node is reached. It ends by back-propagating the value of the new expanded node or by propagating the value of a terminal state. :param state: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recursive call :param chosen_nodes: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # Probably is better to be refactored # Too many arguments (6/5) - Too many local variables (27/15) - Too many branches (28/12) - # Too many statements (86/50) if chosen_nodes is None: # select a legal move on the chess board chosen_nodes = [] node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception("Illegal tree setup. A 'None' move was selected which shouldn't be possible") # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) # append the selected move to the move list chosen_nodes.append(child_idx) # append the chosen child idx to the chosen_nodes list if node is None: state = GameState(deepcopy(parent_node.board)) # get the board from the parent node state.apply_move(move) # apply the selected move on the board # get the transposition-key which is used as an identifier for the board positions in the look-up table transposition_key = state.get_transposition_key() # check if the addressed fen exist in the look-up table # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite # feed-back-loop key = transposition_key + (state.get_fullmove_number(),) if self.use_transposition_table and key in self.node_lookup: node = self.node_lookup[key] # get the node from the look-up list # get the prior value from the leaf node which has already been expanded value = node.initial_value # clip the visit nodes for all nodes in the search tree except the director opp. move clip_low_visit = self.use_pruning new_node = Node( node.board, value, node.policy_prob, node.legal_moves, node.is_leaf, key, clip_low_visit, ) # create a new node with parent_node.lock: parent_node.child_nodes[child_idx] = new_node # add the new node to its parent else: # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be back-propagated through the tree and flipped after every layer my_pipe = self.my_pipe_endings[pipe_id] # receive a free available pipe if self.send_batches: my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() else: state_planes = state.get_state_planes() self.batch_state_planes[pipe_id] = state_planes my_pipe.send(pipe_id) result_channel = my_pipe.recv() value = np.array(self.batch_value_results[result_channel]) policy_vec = np.array(self.batch_policy_results[result_channel]) is_leaf = is_won = False # initialize is_leaf by default to false and check if the game is won # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_check(): if state.is_loss(): is_won = True # needed for e.g. atomic because the king explodes and is not in check mate anymore if state.is_variant_loss(): is_won = True if is_won: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # establish a mate in one connection in order to stop exploring different alternatives parent_node.set_check_mate_node_idx(child_idx) # get the value from the leaf node (the current function is called recursively) # check if you can claim a draw - its assumed that the draw is always claimed elif ( self.can_claim_threefold_repetition(transposition_key, chosen_nodes) or state.get_pythonchess_board().can_claim_fifty_moves() is True ): value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: legal_moves = state.get_legal_moves() # get the current legal move of its board state if not legal_moves: # stalemate occurred which is very rare for crazyhouse if state.uci_variant == "giveaway": value = 1 else: value = 0 is_leaf = True legal_moves = [] p_vec_small = None # raise Exception("No legal move is available for state: %s" % state) else: try: # extract a sparse policy vector with normalized probabilities p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True ) except KeyError: raise Exception("Key Error for state: %s" % state) # clip the visit nodes for all nodes in the search tree except the director opp. move clip_low_visit = self.use_pruning and depth != 1 # and depth > 4 new_node = Node( state.get_pythonchess_board(), value, p_vec_small, legal_moves, is_leaf, transposition_key, clip_low_visit, ) # create a new node if depth == 1: # disable uncertain moves from being visited by giving them a very bad score if not is_leaf and self.use_pruning: if self.root_node_prior_policy[child_idx] < 1e-3 and value * -1 < self.root_node.initial_value: with parent_node.lock: value = 99 # for performance reasons only apply check enhancement on depth 1 for now chess_board = state.get_pythonchess_board() if self.enhance_checks: self._enhance_checks(chess_board, legal_moves, p_vec_small) if self.enhance_captures: self._enhance_captures(chess_board, legal_moves, p_vec_small) if not self.use_pruning: self.node_lookup[key] = new_node # include a reference to the new node in the look-up table with parent_node.lock: parent_node.child_nodes[child_idx] = new_node # add the new node to its parent elif node.is_leaf: # check if we have reached a leaf node value = node.initial_value else: # get the value from the leaf node (the current function is called recursively) value, depth, chosen_nodes = self._run_single_playout(node, pipe_id, depth + 1, chosen_nodes) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # invert the value prediction for the parent of the above node layer because the player's changes every turn return -value, depth, chosen_nodes
def evaluate_board_state(self, state: GameState): # Probably is better to be refactored """ Analyzes the current board state. This is the main method which get called by the uci interface or analysis request. :param state: Actual game state to evaluate for the MCTS :return: """ # Too many local variables (28/15) - Too many branches (25/12) - Too many statements (75/50) self.t_start_eval = time() # store the time at which the search started if not self.net_pred_services[0].running: # check if the net prediction service has already been started for net_pred_service in self.net_pred_services: # start the prediction daemon thread net_pred_service.start() legal_moves = state.get_legal_moves() # list of all possible legal move in the current board position if not legal_moves: # consistency check raise Exception("The given board state has no legal move available") key = state.get_transposition_key() + ( state.get_fullmove_number(), ) # check first if the the current tree can be reused if not self.use_pruning and key in self.node_lookup: chess_board = state.get_pythonchess_board() self.root_node = self.node_lookup[key] # if key in self.node_lookup: if self.enhance_captures: self._enhance_captures(chess_board, legal_moves, self.root_node.policy_prob) # enhance checks for all direct child nodes for child_node in self.root_node.child_nodes: if child_node: self._enhance_captures(child_node.board, child_node.legal_moves, child_node.policy_prob) if self.enhance_checks: self._enhance_checks(chess_board, legal_moves, self.root_node.policy_prob) # enhance checks for all direct child nodes for child_node in self.root_node.child_nodes: if child_node: self._enhance_checks(child_node.board, child_node.legal_moves, child_node.policy_prob) logging.debug( "Reuse the search tree. Number of nodes in search tree: %d", self.root_node.nb_total_expanded_child_nodes, ) self.total_nodes_pre_search = deepcopy(self.root_node.n_sum) else: logging.debug("Starting a brand new search tree...") self.root_node = None self.total_nodes_pre_search = 0 if len(legal_moves) == 1: # check for fast way out max_depth_reached = 1 # if there's only a single legal move you only must go 1 depth if self.root_node is None: # conduct all necessary steps for fastest way out self._expand_root_node_single_move(state, legal_moves) # increase the move time buffer # subtract half a second as a constant for possible delay self.time_buffer_ms += max(self.movetime_ms - 500, 0) else: if self.root_node is None: self._expand_root_node_multiple_moves(state, legal_moves) # run a single expansion on the root node # opening guard if state.get_fullmove_number() <= self.opening_guard_moves: # 100: #7: #10: self.root_node.q_value[self.root_node.policy_prob < 5e-2] = -9999 # elif len(legal_moves) > 50: # self.root_node.q_value[self.root_node.policy_prob < 1e-3] = -9999 # conduct the mcts-search based on the given settings max_depth_reached = self._run_mcts_search(state) t_elapsed = time() - self.t_start_eval print("info string move overhead is %dms" % (t_elapsed * 1000 - self.movetime_ms)) # receive the policy vector based on the MCTS search p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight) # , xth_n_max=xth_n_max, is_root=True) if self.use_future_q_values: # use q-future value to update the q-values of direct child nodes q_future, indices = self.get_last_q_values(min_nb_visits=5, max_depth=5) #25) # self.root_node.q_value = 0.5 * self.root_node.q_value + 0.5 * q_future # TODO: make this matrix vector form if max_depth_reached >= 5: for idx in indices: self.root_node.q_value[idx] = min(self.root_node.q_value[idx], q_future[idx]) p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight) # if self.use_pruning is False: self.node_lookup[key] = self.root_node # store the current root in the lookup table best_child_idx = p_vec_small.argmax() # select the q-value according to the mcts best child value value = self.root_node.q_value[best_child_idx] # value = orig_q[best_child_idx] lst_best_moves, _ = self.get_calculated_line() str_moves = self._mv_list_to_str(lst_best_moves) node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search) # show the best calculated line time_e = time() - self.t_start_eval # In uci the depth is given using half-moves notation also called plies if len(legal_moves) != len(p_vec_small): raise Exception( "Legal move list %s with length %s is incompatible to policy vector %s" " with shape %s for board state %s and nodes legal move list: %s" % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state, self.root_node.legal_moves) ) # define the remaining return variables centipawns = value_to_centipawn(value) depth = max_depth_reached nodes = node_searched time_elapsed_s = time_e * 1000 # avoid division by 0 if time_e > 0.0: nps = node_searched / time_e else: # return a high constant in otherwise nps = 999999999 pv = str_moves if self.verbose: score = "score cp %d depth %d nodes %d time %d nps %d pv %s" % ( centipawns, depth, nodes, time_elapsed_s, nps, pv, ) logging.info("info string %s", score) return value, legal_moves, p_vec_small, centipawns, depth, nodes, time_elapsed_s, nps, pv
class ChessServer: """ Helper for handling the game server""" def __init__(self, name): self.app = Flask(name) self.app.add_url_rule("/api/state", "api/state", self._wrap_endpoint(ChessServer.serve_state)) self.app.add_url_rule("/api/new", "api/new", self._wrap_endpoint(ChessServer.serve_new_game)) self.app.add_url_rule("/api/move", "api/move", self._wrap_endpoint(ChessServer.serve_move)) self.app.add_url_rule("/", "serve_client_r", self._wrap_endpoint(ChessServer.serve_client)) self.app.add_url_rule("/<path:path>", "serve_client", self._wrap_endpoint(ChessServer.serve_client)) self._gamestate = GameState() net = NeuralNetAPI() # Loading network player_agents = { "raw_net": RawNetAgent(net), "mcts": MCTSAgent(net, virtual_loss=3, threads=BATCH_SIZE, cpuct=CPUCT, dirichlet_epsilon=DIRICHLET_EPSILON), } self.agent = player_agents["raw_net"] # Setting up agent # self.agent = player_agents["mcts"] def _wrap_endpoint(self, func): """TODO: docstring""" def wrapper(kwargs): return func(self, **kwargs) return lambda **kwargs: wrapper(kwargs) def run(self): """ Run the flask server""" self.app.run() @staticmethod def serve_client(path=None): """Find the client server path""" if path is None: path = "index.html" return send_from_directory("./client", path) def serve_state(self): """TODO: docstring""" return self.serialize_game_state() def serve_new_game(self): """TODO: docstring""" logging.debug("staring new game()") self.perform_new_game() return self.serialize_game_state() def serve_move(self): """ Groups the move requests and data to the server and the response from it""" # read move data drop_piece = request.args.get("drop") from_square = request.args.get("from") to_square = request.args.get("to") promotion_piece = request.args.get("promotion") from_square_idx = get_square_index_from_name(from_square) to_square_idx = get_square_index_from_name(to_square) if (from_square_idx is None and drop_piece is None) or to_square_idx is None: return self.serialize_game_state("board name is invalid") promotion = drop = None if drop_piece: from_square_idx = to_square_idx if not drop_piece in chess.PIECE_SYMBOLS: return self.serialize_game_state("drop piece name is invalid") drop = chess.PIECE_SYMBOLS.index(drop_piece) if promotion_piece: if not promotion_piece in chess.PIECE_SYMBOLS: return self.serialize_game_state( "promotion piece name is invalid") promotion = chess.PIECE_SYMBOLS.index(promotion_piece) move = chess.Move(from_square_idx, to_square_idx, promotion, drop) # perform move try: self.perform_move(move) except ValueError as err: logging.error("ValueError %s", err) return self.serialize_game_state(err.args[0]) # calculate agent response if not self.perform_agent_move(): return self.serialize_game_state("Black has no more moves to play", True) return self.serialize_game_state() def perform_new_game(self): """Initialize a new game on the server""" self._gamestate = GameState() def perform_move(self, move): """ Apply the move on the game and check if the legality of it""" logging.debug("perform_move(): %s", move) # check if move is valid if move not in list(self._gamestate.board.legal_moves): raise ValueError( "The given move %s is invalid for the current position" % move) self._gamestate.apply_move(move) if self._gamestate.is_loss(): logging.debug("Checkmate") return False return None def perform_agent_move(self): """TODO: docstring""" if self._gamestate.is_loss(): logging.debug("Checkmate") return False value, move, _, _ = self.agent.perform_action(self._gamestate) if not self._gamestate.is_white_to_move(): value = -value logging.debug("Value %.4f", value) if move is None: logging.error("None move proposed!") return False self.perform_move(move) return True def serialize_game_state(self, message=None, finished=None): """ Encodes the game state to a .json file""" if message is None: message = "" board_str = "" + self._gamestate.board.__str__() pocket_str = "" + self._gamestate.board.pockets[1].__str__( ) + "|" + self._gamestate.board.pockets[0].__str__() state = {"board": board_str, "pocket": pocket_str, "message": message} if finished: state["finished"] = finished return json.dumps(state)
def perform_new_game(self): """Initialize a new game on the server""" self._gamestate = GameState()