Пример #1
0
    def evaluate_board_state(
            self, state: AbsGameState):  # Too few public methods (1/2)
        """
        The greedy agent always performs the first legal move with the highest move probability

        :param state: Gamestate object
        :return:
        value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won
        selected_move - Python chess move object of the selected move
        confidence - Probability value for the selected move in the probability distribution
        idx - Integer index of the move which was returned
        centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view
        depth - Depth which was reached after the search
        nodes - Number of nodes which have been evaluated in the search
        time_elapsed_s - Elapsed time in seconds for the full search
        nps - Nodes per second metric
        pv - Calculated best line for both players
        """

        t_start_eval = time()
        pred_value, pred_policy = self._net.predict_single(
            state.get_state_planes())
        legal_moves = list(state.get_legal_moves())
        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves,
                                             state.mirror_policy())
        # define the remaining return variables
        time_e = time() - t_start_eval
        centipawn = value_to_centipawn(pred_value)
        depth = nodes = 1
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        # use the move with the highest probability as the best move for logging
        pv = legal_moves[p_vec_small.argmax()].uci()
        return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
Пример #2
0
    def _expand_root_node_single_move(self, state, legal_moves):
        """
        Expands the current root in the case if there's only a single move available.
        The neural network search can be omitted in this case.
        :param state: Request games state
        :param legal_moves: Available moves
        :return:
        """
        # request the value prediction for the current position
        [value, _] = self.nets[0].predict_single(state.get_state_planes())
        p_vec_small = np.array([1], np.float32)  # we can create the move probability vector without the NN this time

        # create a new root node
        self.root_node = Node(state.get_pythonchess_board(), value, p_vec_small, legal_moves, clip_low_visit=False)

        if self.root_node.child_nodes[0] is None:  # check a child node if it doesn't exists already
            state_child = deepcopy(state)
            state_child.apply_move(legal_moves[0])
            is_leaf = False  # initialize is_leaf by default to false
            # we don't need to check for is_lost() because the game is already over
            if state.is_loss():  # check if the current player has won the game
                value = -1
                is_leaf = True
                legal_moves_child = []
                p_vec_small_child = None
            elif state.board.uci_variant == "giveaway" and state.is_win():
                # giveaway chess is a variant in which you win on your own turn
                value = +1
                is_leaf = True
                legal_moves_child = []
                p_vec_small_child = None
            # check if you can claim a draw - its assumed that the draw is always claimed
            elif (
                self.can_claim_threefold_repetition(state.get_transposition_key(), [0])
                or state.get_pythonchess_board().can_claim_fifty_moves()
            ):
                value = 0
                is_leaf = True
                legal_moves_child = []
                p_vec_small_child = None
            else:
                legal_moves_child = state_child.get_legal_moves()
                # start a brand new prediction for the child
                [value, policy_vec] = self.nets[0].predict_single(state_child.get_state_planes())
                # extract a sparse policy vector with normalized probabilities
                p_vec_small_child = get_probs_of_move_list(
                    policy_vec, legal_moves_child, state_child.is_white_to_move()
                )

            # create a new child node
            child_node = Node(state.get_pythonchess_board(), value, p_vec_small_child, legal_moves_child, is_leaf)
            self.root_node.child_nodes[0] = child_node  # connect the child to the root
            # assign the value of the root node as the q-value for the child
            # here we must invert the invert the value because it's the value prediction of the next state
            self.root_node.q_value[0] = -value
Пример #3
0
    def evaluate_board_state(
            self, state: AbsGameState):  # Too few public methods (1/2)
        """
        The greedy agent always performs the first legal move with the highest move probability

        :param state: Gamestate object
        :return:
        value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won
        selected_move - Python chess move object of the selected move
        confidence - Probability value for the selected move in the probability distribution
        idx - Integer index of the move which was returned
        centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view
        depth - Depth which was reached after the search
        nodes - Number of nodes which have been evaluated in the search
        time_elapsed_s - Elapsed time in seconds for the full search
        nps - Nodes per second metric
        pv - Calculated best line for both players
        """

        t_start_eval = time()

        # Start sync inference
        print("Starting inference")

        print("Preparing input blobs")
        input_blob = next(iter(self._net.read_net.input_info))
        output_blob = iter(self._net.read_net.outputs)
        pred_policy_blob = next(output_blob)
        pred_value_blob = next(output_blob)

        # NB: This is required to load the image as uint8 np.array
        #     Without this step the input blob is loaded in FP32 precision,
        #     this requires additional operation and more memory.
        self._net.read_net.input_info[input_blob].precision = "U8"

        res = self._net.exec_net.infer(
            inputs={input_blob: state.get_state_planes()})

        #TODO Check order of output

        pred_value = res[pred_value_blob][0][0]
        pred_policy = res[pred_policy_blob][0]

        legal_moves = list(state.get_legal_moves())
        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves,
                                             state.is_white_to_move())
        # define the remaining return variables
        time_e = time() - t_start_eval
        centipawn = value_to_centipawn(pred_value)
        depth = nodes = 1
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        # use the move with the highest probability as the best move for logging
        pv = legal_moves[p_vec_small.argmax()].uci()
        return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
Пример #4
0
    def _expand_root_node_multiple_moves(self, state, legal_moves):
        """
        Checks if the current root node can be found in the look-up table.
        Otherwise run a single inference of the neural network for this board state
        :param state: Current game state
        :param legal_moves: Available moves
        :return:
        """

        is_leaf = False  # initialize is_leaf by default to false
        [value, policy_vec] = self.nets[0].predict_single(state.get_state_planes())  # start a brand new tree
        # extract a sparse policy vector with normalized probabilities
        p_vec_small = get_probs_of_move_list(policy_vec, legal_moves, state.is_white_to_move())
        chess_board = state.get_pythonchess_board()
        if self.enhance_captures:
            self._enhance_captures(chess_board, legal_moves, p_vec_small)

        if self.enhance_checks:
            self._enhance_checks(chess_board, legal_moves, p_vec_small)

        # create a new root node
        self.root_node = Node(chess_board, value, p_vec_small, legal_moves, is_leaf, clip_low_visit=False)
Пример #5
0
    def _run_single_playout(self, parent_node: Node, pipe_id=0, depth=1, chosen_nodes=None):
        """
        This function works recursively until a leaf or terminal node is reached.
        It ends by back-propagating the value of the new expanded node or by propagating the value of a terminal state.

        :param state: Current game-state for the evaluation. This state differs between the treads
        :param parent_node: Current parent-node of the selected node. In the first  expansion this is the root node.
        :param depth: Current depth for the evaluation. Depth is increased by 1 for every recursive call
        :param chosen_nodes: List of moves which have been taken in the current path.
                        For each selected child node this list is expanded by one move recursively.
        :param chosen_nodes: List of all nodes that this thread has explored with respect to the root node
        :return: -value: The inverse value prediction of the current board state. The flipping by -1 each turn is needed
                        because the point of view changes each half-move
                depth: Current depth reach by this evaluation
                mv_list: List of moves which have been selected
        """
        # Probably is better to be refactored
        # Too many arguments (6/5) - Too many local variables (27/15) - Too many branches (28/12) -
        # Too many statements (86/50)
        if chosen_nodes is None:  # select a legal move on the chess board
            chosen_nodes = []
        node, move, child_idx = self._select_node(parent_node)

        if move is None:
            raise Exception("Illegal tree setup. A 'None' move was selected which shouldn't be possible")
        # update the visit counts to this node
        # temporarily reduce the attraction of this node by applying a virtual loss /
        # the effect of virtual loss will be undone if the playout is over
        parent_node.apply_virtual_loss_to_child(child_idx, self.virtual_loss)

        # append the selected move to the move list
        chosen_nodes.append(child_idx)  # append the chosen child idx to the chosen_nodes list

        if node is None:
            state = GameState(deepcopy(parent_node.board))  # get the board from the parent node
            state.apply_move(move)  # apply the selected move on the board

            # get the transposition-key which is used as an identifier for the board positions in the look-up table
            transposition_key = state.get_transposition_key()
            # check if the addressed fen exist in the look-up table
            # note: It's important to use also the halfmove-counter here, otherwise the system can create an infinite
            # feed-back-loop
            key = transposition_key + (state.get_fullmove_number(),)

            if self.use_transposition_table and key in self.node_lookup:

                node = self.node_lookup[key]  # get the node from the look-up list

                # get the prior value from the leaf node which has already been expanded
                value = node.initial_value

                # clip the visit nodes for all nodes in the search tree except the director opp. move
                clip_low_visit = self.use_pruning

                new_node = Node(
                    node.board,
                    value,
                    node.policy_prob,
                    node.legal_moves,
                    node.is_leaf,
                    key,
                    clip_low_visit,
                )  # create a new node

                with parent_node.lock:
                    parent_node.child_nodes[child_idx] = new_node  # add the new node to its parent

            else:
                # expand and evaluate the new board state (the node wasn't found in the look-up table)
                # its value will be back-propagated through the tree and flipped after every layer
                my_pipe = self.my_pipe_endings[pipe_id]  # receive a free available pipe

                if self.send_batches:
                    my_pipe.send(state.get_state_planes())
                    # this pipe waits for the predictions of the network inference service
                    [value, policy_vec] = my_pipe.recv()
                else:
                    state_planes = state.get_state_planes()
                    self.batch_state_planes[pipe_id] = state_planes
                    my_pipe.send(pipe_id)
                    result_channel = my_pipe.recv()
                    value = np.array(self.batch_value_results[result_channel])
                    policy_vec = np.array(self.batch_policy_results[result_channel])

                is_leaf = is_won = False  # initialize is_leaf by default to false and check if the game is won
                # check if the current player has won the game
                # (we don't need to check for is_lost() because the game is already over
                #  if the current player checkmated his opponent)
                if state.is_check():
                    if state.is_loss():
                        is_won = True

                # needed for e.g. atomic because the king explodes and is not in check mate anymore
                if state.is_variant_loss():
                    is_won = True

                if is_won:
                    value = -1
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None
                    # establish a mate in one connection in order to stop exploring different alternatives
                    parent_node.set_check_mate_node_idx(child_idx)
                # get the value from the leaf node (the current function is called recursively)
                # check if you can claim a draw - its assumed that the draw is always claimed
                elif (
                    self.can_claim_threefold_repetition(transposition_key, chosen_nodes)
                    or state.get_pythonchess_board().can_claim_fifty_moves() is True
                ):
                    value = 0
                    is_leaf = True
                    legal_moves = []
                    p_vec_small = None
                else:
                    legal_moves = state.get_legal_moves()  # get the current legal move of its board state

                    if not legal_moves:
                        # stalemate occurred which is very rare for crazyhouse
                        if state.uci_variant == "giveaway":
                            value = 1
                        else:
                            value = 0
                        is_leaf = True
                        legal_moves = []
                        p_vec_small = None
                        # raise Exception("No legal move is available for state: %s" % state)
                    else:
                        try:  # extract a sparse policy vector with normalized probabilities
                            p_vec_small = get_probs_of_move_list(
                                policy_vec, legal_moves, is_white_to_move=state.is_white_to_move(), normalize=True
                            )
                        except KeyError:
                            raise Exception("Key Error for state: %s" % state)

                # clip the visit nodes for all nodes in the search tree except the director opp. move
                clip_low_visit = self.use_pruning and depth != 1  # and depth > 4
                new_node = Node(
                    state.get_pythonchess_board(),
                    value,
                    p_vec_small,
                    legal_moves,
                    is_leaf,
                    transposition_key,
                    clip_low_visit,
                )  # create a new node

                if depth == 1:
                    # disable uncertain moves from being visited by giving them a very bad score
                    if not is_leaf and self.use_pruning:
                        if self.root_node_prior_policy[child_idx] < 1e-3 and value * -1 < self.root_node.initial_value:
                            with parent_node.lock:
                                value = 99

                    # for performance reasons only apply check enhancement on depth 1 for now
                    chess_board = state.get_pythonchess_board()
                    if self.enhance_checks:
                        self._enhance_checks(chess_board, legal_moves, p_vec_small)

                    if self.enhance_captures:
                        self._enhance_captures(chess_board, legal_moves, p_vec_small)

                if not self.use_pruning:
                    self.node_lookup[key] = new_node  # include a reference to the new node in the look-up table

                with parent_node.lock:
                    parent_node.child_nodes[child_idx] = new_node  # add the new node to its parent
        elif node.is_leaf:  # check if we have reached a leaf node
            value = node.initial_value
        else:
            # get the value from the leaf node (the current function is called recursively)
            value, depth, chosen_nodes = self._run_single_playout(node, pipe_id, depth + 1, chosen_nodes)
        # revert the virtual loss and apply the predicted value by the network to the node
        parent_node.revert_virtual_loss_and_update(child_idx, self.virtual_loss, -value)
        # invert the value prediction for the parent of the above node layer because the player's changes every turn
        return -value, depth, chosen_nodes
Пример #6
0
    def negamax(self,
                state,
                depth,
                alpha=-math.inf,
                beta=math.inf,
                color=1,
                all_moves=1):
        """
        Evaluates all nodes at a given depth and back-propagates their values to their respective parent nodes.
        In order to keep the number nof nodes manageable for neural network evaluation
        :param all_moves: All possible moves
        :param state: Game state object
        :param depth: Number of depth to reach during search
        :param alpha: Current alpha value which is used for pruning
        :param beta: Current beta value which is used for pruning
        :param color: Integer color value 1 for white, -1 for black
        :return: best_value - Best value for the current player until search depth
        """

        if state.is_loss(
        ):  # check for draw is neglected for now due to bad runtime
            return -1

        if state.get_pythonchess_board().can_claim_draw():
            return 0

        [value, policy_vec] = self.net.predict_single(
            state.get_state_planes())  # start a brand new tree

        if depth == 0:
            return value  # the value is always returned in the view of the current player

        best_value = -math.inf  # initialization

        legal_moves = state.get_legal_moves()
        p_vec_small = get_probs_of_move_list(policy_vec,
                                             state.get_legal_moves(),
                                             state.mirror_policy())

        if all_moves > 0:
            mv_idces = list(np.argsort(p_vec_small)[::-1])
        else:
            mv_idces = list(
                np.argsort(p_vec_small)[::-1][:self.nb_candidate_moves])

        if self.include_check_moves:
            check_idces, _ = get_check_move_indices(
                state.get_pythonchess_board(), state.get_legal_moves())
            mv_idces += check_idces

        for mv_idx in mv_idces:  # each child of position
            if p_vec_small[mv_idx] > 0.1:
                mv = legal_moves[mv_idx]
                state_child = copy.deepcopy(state)
                state_child.apply_move(mv)
                value = -self.negamax(state_child, depth - 1, -beta, -alpha,
                                      -color, all_moves - 1)
                if value > best_value:
                    self.best_moves[-depth] = mv
                    self.sel_mv_idx[-depth] = mv_idx
                    best_value = value
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
        return best_value