def evaluate_board_state( self, state: AbsGameState): # Too few public methods (1/2) """ The greedy agent always performs the first legal move with the highest move probability :param state: Gamestate object :return: value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won selected_move - Python chess move object of the selected move confidence - Probability value for the selected move in the probability distribution idx - Integer index of the move which was returned centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view depth - Depth which was reached after the search nodes - Number of nodes which have been evaluated in the search time_elapsed_s - Elapsed time in seconds for the full search nps - Nodes per second metric pv - Calculated best line for both players """ t_start_eval = time() pred_value, pred_policy = self._net.predict_single( state.get_state_planes()) legal_moves = list(state.get_legal_moves()) p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.mirror_policy()) # define the remaining return variables time_e = time() - t_start_eval centipawn = value_to_centipawn(pred_value) depth = nodes = 1 time_elapsed_s = time_e * 1000 nps = nodes / time_e # use the move with the highest probability as the best move for logging pv = legal_moves[p_vec_small.argmax()].uci() return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
def _expand_root_node_single_move(self, state, legal_moves): """ Expands the current root in the case if there's only a single move available. The neural network search can be omitted in this case. :param state: Request games state :param legal_moves: Available moves :return: """ # request the value prediction for the current position [value, _] = self.nets[0].predict_single(state.get_state_planes()) p_vec_small = np.array([1], np.float32) # we can create the move probability vector without the NN this time # create a new root node self.root_node = Node(state.get_pythonchess_board(), value, p_vec_small, legal_moves, clip_low_visit=False) if self.root_node.child_nodes[0] is None: # check a child node if it doesn't exists already state_child = deepcopy(state) state_child.apply_move(legal_moves[0]) is_leaf = False # initialize is_leaf by default to false # we don't need to check for is_lost() because the game is already over if state.is_loss(): # check if the current player has won the game value = -1 is_leaf = True legal_moves_child = [] p_vec_small_child = None elif state.board.uci_variant == "giveaway" and state.is_win(): # giveaway chess is a variant in which you win on your own turn value = +1 is_leaf = True legal_moves_child = [] p_vec_small_child = None # check if you can claim a draw - its assumed that the draw is always claimed elif ( self.can_claim_threefold_repetition(state.get_transposition_key(), [0]) or state.get_pythonchess_board().can_claim_fifty_moves() ): value = 0 is_leaf = True legal_moves_child = [] p_vec_small_child = None else: legal_moves_child = state_child.get_legal_moves() # start a brand new prediction for the child [value, policy_vec] = self.nets[0].predict_single(state_child.get_state_planes()) # extract a sparse policy vector with normalized probabilities p_vec_small_child = get_probs_of_move_list( policy_vec, legal_moves_child, state_child.is_white_to_move() ) # create a new child node child_node = Node(state.get_pythonchess_board(), value, p_vec_small_child, legal_moves_child, is_leaf) self.root_node.child_nodes[0] = child_node # connect the child to the root # assign the value of the root node as the q-value for the child # here we must invert the invert the value because it's the value prediction of the next state self.root_node.q_value[0] = -value
def evaluate_board_state( self, state: AbsGameState): # Too few public methods (1/2) """ The greedy agent always performs the first legal move with the highest move probability :param state: Gamestate object :return: value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won selected_move - Python chess move object of the selected move confidence - Probability value for the selected move in the probability distribution idx - Integer index of the move which was returned centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view depth - Depth which was reached after the search nodes - Number of nodes which have been evaluated in the search time_elapsed_s - Elapsed time in seconds for the full search nps - Nodes per second metric pv - Calculated best line for both players """ t_start_eval = time() # Start sync inference print("Starting inference") print("Preparing input blobs") input_blob = next(iter(self._net.read_net.input_info)) output_blob = iter(self._net.read_net.outputs) pred_policy_blob = next(output_blob) pred_value_blob = next(output_blob) # NB: This is required to load the image as uint8 np.array # Without this step the input blob is loaded in FP32 precision, # this requires additional operation and more memory. self._net.read_net.input_info[input_blob].precision = "U8" res = self._net.exec_net.infer( inputs={input_blob: state.get_state_planes()}) #TODO Check order of output pred_value = res[pred_value_blob][0][0] pred_policy = res[pred_policy_blob][0] legal_moves = list(state.get_legal_moves()) p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move()) # define the remaining return variables time_e = time() - t_start_eval centipawn = value_to_centipawn(pred_value) depth = nodes = 1 time_elapsed_s = time_e * 1000 nps = nodes / time_e # use the move with the highest probability as the best move for logging pv = legal_moves[p_vec_small.argmax()].uci() return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
def _expand_root_node_multiple_moves(self, state, legal_moves): """ Checks if the current root node can be found in the look-up table. Otherwise run a single inference of the neural network for this board state :param state: Current game state :param legal_moves: Available moves :return: """ is_leaf = False # initialize is_leaf by default to false [value, policy_vec] = self.nets[0].predict_single(state.get_state_planes()) # start a brand new tree # extract a sparse policy vector with normalized probabilities p_vec_small = get_probs_of_move_list(policy_vec, legal_moves, state.is_white_to_move()) chess_board = state.get_pythonchess_board() if self.enhance_captures: self._enhance_captures(chess_board, legal_moves, p_vec_small) if self.enhance_checks: self._enhance_checks(chess_board, legal_moves, p_vec_small) # create a new root node self.root_node = Node(chess_board, value, p_vec_small, legal_moves, is_leaf, clip_low_visit=False)
def _run_single_playout(self, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=None): """ This function works recursively until a leaf or terminal node is reached. It ends by back-propagating the value of the new expanded node or by propagating the value of a terminal state. :param state: Current game-state for the evaluation. This state differs between the treads :param parent_node: Current parent-node of the selected node. In the first expansion this is the root node. :param depth: Current depth for the evaluation. Depth is increased by 1 for every recursive call :param chosen_nodes: List of moves which have been taken in the current path. For each selected child node this list is expanded by one move recursively. :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed because the point of view changes each half-move depth: Current depth reach by this evaluation mv_list: List of moves which have been selected """ # Probably is better to be refactored # Too many arguments (6/5) - Too many local variables (27/15) - Too many branches (28/12) - # Too many statements (86/50) if chosen_nodes is None: # select a legal move on the chess board chosen_nodes = [] node, move, child_idx = self._select_node(parent_node) if move is None: raise Exception("Illegal tree setup. A 'None' move was selected which shouldn't be possible") # update the visit counts to this node # temporarily reduce the attraction of this node by applying a virtual loss / # the effect of virtual loss will be undone if the playout is over parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss) # append the selected move to the move list chosen_nodes.append(child_idx) # append the chosen child idx to the chosen_nodes list if node is None: state = GameState(deepcopy(parent_node.board)) # get the board from the parent node state.apply_move(move) # apply the selected move on the board # get the transposition-key which is used as an identifier for the board positions in the look-up table transposition_key = state.get_transposition_key() # check if the addressed fen exist in the look-up table # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite # feed-back-loop key = transposition_key + (state.get_fullmove_number(),) if self.use_transposition_table and key in self.node_lookup: node = self.node_lookup[key] # get the node from the look-up list # get the prior value from the leaf node which has already been expanded value = node.initial_value # clip the visit nodes for all nodes in the search tree except the director opp. move clip_low_visit = self.use_pruning new_node = Node( node.board, value, node.policy_prob, node.legal_moves, node.is_leaf, key, clip_low_visit, ) # create a new node with parent_node.lock: parent_node.child_nodes[child_idx] = new_node # add the new node to its parent else: # expand and evaluate the new board state (the node wasn't found in the look-up table) # its value will be back-propagated through the tree and flipped after every layer my_pipe = self.my_pipe_endings[pipe_id] # receive a free available pipe if self.send_batches: my_pipe.send(state.get_state_planes()) # this pipe waits for the predictions of the network inference service [value, policy_vec] = my_pipe.recv() else: state_planes = state.get_state_planes() self.batch_state_planes[pipe_id] = state_planes my_pipe.send(pipe_id) result_channel = my_pipe.recv() value = np.array(self.batch_value_results[result_channel]) policy_vec = np.array(self.batch_policy_results[result_channel]) is_leaf = is_won = False # initialize is_leaf by default to false and check if the game is won # check if the current player has won the game # (we don't need to check for is_lost() because the game is already over # if the current player checkmated his opponent) if state.is_check(): if state.is_loss(): is_won = True # needed for e.g. atomic because the king explodes and is not in check mate anymore if state.is_variant_loss(): is_won = True if is_won: value = -1 is_leaf = True legal_moves = [] p_vec_small = None # establish a mate in one connection in order to stop exploring different alternatives parent_node.set_check_mate_node_idx(child_idx) # get the value from the leaf node (the current function is called recursively) # check if you can claim a draw - its assumed that the draw is always claimed elif ( self.can_claim_threefold_repetition(transposition_key, chosen_nodes) or state.get_pythonchess_board().can_claim_fifty_moves() is True ): value = 0 is_leaf = True legal_moves = [] p_vec_small = None else: legal_moves = state.get_legal_moves() # get the current legal move of its board state if not legal_moves: # stalemate occurred which is very rare for crazyhouse if state.uci_variant == "giveaway": value = 1 else: value = 0 is_leaf = True legal_moves = [] p_vec_small = None # raise Exception("No legal move is available for state: %s" % state) else: try: # extract a sparse policy vector with normalized probabilities p_vec_small = get_probs_of_move_list( policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True ) except KeyError: raise Exception("Key Error for state: %s" % state) # clip the visit nodes for all nodes in the search tree except the director opp. move clip_low_visit = self.use_pruning and depth != 1 # and depth > 4 new_node = Node( state.get_pythonchess_board(), value, p_vec_small, legal_moves, is_leaf, transposition_key, clip_low_visit, ) # create a new node if depth == 1: # disable uncertain moves from being visited by giving them a very bad score if not is_leaf and self.use_pruning: if self.root_node_prior_policy[child_idx] < 1e-3 and value * -1 < self.root_node.initial_value: with parent_node.lock: value = 99 # for performance reasons only apply check enhancement on depth 1 for now chess_board = state.get_pythonchess_board() if self.enhance_checks: self._enhance_checks(chess_board, legal_moves, p_vec_small) if self.enhance_captures: self._enhance_captures(chess_board, legal_moves, p_vec_small) if not self.use_pruning: self.node_lookup[key] = new_node # include a reference to the new node in the look-up table with parent_node.lock: parent_node.child_nodes[child_idx] = new_node # add the new node to its parent elif node.is_leaf: # check if we have reached a leaf node value = node.initial_value else: # get the value from the leaf node (the current function is called recursively) value, depth, chosen_nodes = self._run_single_playout(node, pipe_id, depth + 1, chosen_nodes) # revert the virtual loss and apply the predicted value by the network to the node parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value) # invert the value prediction for the parent of the above node layer because the player's changes every turn return -value, depth, chosen_nodes
def negamax(self, state, depth, alpha=-math.inf, beta=math.inf, color=1, all_moves=1): """ Evaluates all nodes at a given depth and back-propagates their values to their respective parent nodes. In order to keep the number nof nodes manageable for neural network evaluation :param all_moves: All possible moves :param state: Game state object :param depth: Number of depth to reach during search :param alpha: Current alpha value which is used for pruning :param beta: Current beta value which is used for pruning :param color: Integer color value 1 for white, -1 for black :return: best_value - Best value for the current player until search depth """ if state.is_loss( ): # check for draw is neglected for now due to bad runtime return -1 if state.get_pythonchess_board().can_claim_draw(): return 0 [value, policy_vec] = self.net.predict_single( state.get_state_planes()) # start a brand new tree if depth == 0: return value # the value is always returned in the view of the current player best_value = -math.inf # initialization legal_moves = state.get_legal_moves() p_vec_small = get_probs_of_move_list(policy_vec, state.get_legal_moves(), state.mirror_policy()) if all_moves > 0: mv_idces = list(np.argsort(p_vec_small)[::-1]) else: mv_idces = list( np.argsort(p_vec_small)[::-1][:self.nb_candidate_moves]) if self.include_check_moves: check_idces, _ = get_check_move_indices( state.get_pythonchess_board(), state.get_legal_moves()) mv_idces += check_idces for mv_idx in mv_idces: # each child of position if p_vec_small[mv_idx] > 0.1: mv = legal_moves[mv_idx] state_child = copy.deepcopy(state) state_child.apply_move(mv) value = -self.negamax(state_child, depth - 1, -beta, -alpha, -color, all_moves - 1) if value > best_value: self.best_moves[-depth] = mv self.sel_mv_idx[-depth] = mv_idx best_value = value alpha = max(alpha, value) if alpha >= beta: break return best_value