Exemplo n.º 1
0
    def evaluate_board_state(
            self, state: AbsGameState):  # Too few public methods (1/2)
        """
        The greedy agent always performs the first legal move with the highest move probability

        :param state: Gamestate object
        :return:
        value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won
        selected_move - Python chess move object of the selected move
        confidence - Probability value for the selected move in the probability distribution
        idx - Integer index of the move which was returned
        centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view
        depth - Depth which was reached after the search
        nodes - Number of nodes which have been evaluated in the search
        time_elapsed_s - Elapsed time in seconds for the full search
        nps - Nodes per second metric
        pv - Calculated best line for both players
        """

        t_start_eval = time()
        pred_value, pred_policy = self._net.predict_single(
            state.get_state_planes())
        legal_moves = list(state.get_legal_moves())
        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves,
                                             state.mirror_policy())
        # define the remaining return variables
        time_e = time() - t_start_eval
        centipawn = value_to_centipawn(pred_value)
        depth = nodes = 1
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        # use the move with the highest probability as the best move for logging
        pv = legal_moves[p_vec_small.argmax()].uci()
        return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
Exemplo n.º 2
0
    def evaluate_board_state(self, state: AbsGameState) -> tuple:
        """
        Evaluates a given board position according to alpha beta search
        :param state: Game state object
        :return:
        """
        self.t_start_eval = time()
        value = self.negamax(state,
                             depth=self.depth,
                             alpha=-math.inf,
                             beta=math.inf,
                             color=1 if state.board.turn else -1)

        legal_moves = state.get_legal_moves()
        policy = np.zeros(len(legal_moves))
        policy[self.sel_mv_idx[0]] = 1
        centipawn = value_to_centipawn(value)
        # depth = 1
        nodes = self.nodes
        time_e = time(
        ) - self.t_start_eval  # In uci the depth is given using half-moves notation also called plies
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        pv = self.best_moves[0].uci()

        logging.info(f"{self.best_moves}")
        logging.info(f"Value: {value}, Centipawn: {centipawn}")
        return value, legal_moves, policy, centipawn, self.depth, nodes, time_elapsed_s, nps, pv
Exemplo n.º 3
0
    def evaluate_board_state(
            self, state: AbsGameState):  # Too few public methods (1/2)
        """
        The greedy agent always performs the first legal move with the highest move probability

        :param state: Gamestate object
        :return:
        value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won
        selected_move - Python chess move object of the selected move
        confidence - Probability value for the selected move in the probability distribution
        idx - Integer index of the move which was returned
        centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view
        depth - Depth which was reached after the search
        nodes - Number of nodes which have been evaluated in the search
        time_elapsed_s - Elapsed time in seconds for the full search
        nps - Nodes per second metric
        pv - Calculated best line for both players
        """

        t_start_eval = time()

        # Start sync inference
        print("Starting inference")

        print("Preparing input blobs")
        input_blob = next(iter(self._net.read_net.input_info))
        output_blob = iter(self._net.read_net.outputs)
        pred_policy_blob = next(output_blob)
        pred_value_blob = next(output_blob)

        # NB: This is required to load the image as uint8 np.array
        #     Without this step the input blob is loaded in FP32 precision,
        #     this requires additional operation and more memory.
        self._net.read_net.input_info[input_blob].precision = "U8"

        res = self._net.exec_net.infer(
            inputs={input_blob: state.get_state_planes()})

        #TODO Check order of output

        pred_value = res[pred_value_blob][0][0]
        pred_policy = res[pred_policy_blob][0]

        legal_moves = list(state.get_legal_moves())
        p_vec_small = get_probs_of_move_list(pred_policy, legal_moves,
                                             state.is_white_to_move())
        # define the remaining return variables
        time_e = time() - t_start_eval
        centipawn = value_to_centipawn(pred_value)
        depth = nodes = 1
        time_elapsed_s = time_e * 1000
        nps = nodes / time_e
        # use the move with the highest probability as the best move for logging
        pv = legal_moves[p_vec_small.argmax()].uci()
        return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
Exemplo n.º 4
0
    def _run_mcts_search(self, state):
        """
        Runs a new or continues the mcts on the current search tree.
        :param state: Input state given by the user
        :return: max_depth_reached (int) - The longest search path length after the whole search
        """

        self.node_lookup = {}  # clear the look up table
        self.root_node_prior_policy = deepcopy(self.root_node.policy_prob)  # safe the prior policy of the root node
        # apply dirichlet noise to the prior probabilities in order to ensure
        #  that every move can possibly be visited
        self.root_node.apply_dirichlet_noise_to_prior_policy(epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha)
        # store what depth has been reached at maximum in the current search tree
        max_depth_reached = 1  # default is 1, in case only 1 move is available
        futures = []

        if state.are_pocket_empty():  # set the number of playouts accordingly
            nb_playouts = self.nb_playouts_empty_pockets
        else:
            nb_playouts = self.nb_playouts_filled_pockets

        t_elapsed_ms = cur_playouts = 0
        old_time = time()
        cpuct_init = self.cpuct

        if self.use_time_management:
            time_checked = time_checked_early = False
        else:
            time_checked = time_checked_early = True

        while (
            max_depth_reached < self.max_search_depth and cur_playouts < nb_playouts and t_elapsed_ms < self.movetime_ms
        ):  # and np.abs(self.root_node.q_value.mean()) < 0.99:

            # start searching
            with ThreadPoolExecutor(max_workers=self.threads) as executor:
                for i in range(self.threads):
                    # calculate the thread id based on the current playout
                    futures.append(
                        executor.submit(
                            self._run_single_playout, parent_node=self.root_node, pipe_id=i, depth=1, chosen_nodes=[]
                        )
                    )

            cur_playouts += self.threads
            time_show_info = time() - old_time

            for i, future in enumerate(futures):
                cur_value, cur_depth, chosen_nodes = future.result()

                if cur_depth > max_depth_reached:
                    max_depth_reached = cur_depth
                # Print the explored line of the last line for every x seconds if verbose is true
                if self.verbose and time_show_info > 0.5 and i == len(futures) - 1:
                    mv_list = self._create_mv_list(chosen_nodes)
                    str_moves = self._mv_list_to_str(mv_list)
                    print(
                        "info score cp %d depth %d nodes %d pv %s"
                        % (value_to_centipawn(cur_value), cur_depth, self.root_node.n_sum, str_moves)
                    )
                    logging.debug("Update info")
                    old_time = time()

            t_elapsed = time() - self.t_start_eval  # update the current search time
            t_elapsed_ms = t_elapsed * 1000
            if time_show_info > 1:
                node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search)
                print("info nps %d time %d" % (int((node_searched / t_elapsed)), t_elapsed_ms))

            if not time_checked_early and t_elapsed_ms > self.movetime_ms / 2:
                if (
                    self.root_node.policy_prob.max() > 0.9
                    and self.root_node.policy_prob.argmax() == self.root_node.q_value.argmax()
                ):
                    self.time_buffer_ms += (self.movetime_ms - t_elapsed_ms) * 0.9
                    print("info early break up")
                    break
                else:
                    time_checked_early = True

            if (
                self.time_buffer_ms > 2500
                and not time_checked
                and t_elapsed_ms > self.movetime_ms * 0.9
                and self.root_node.q_value[self.root_node.child_number_visits.argmax()]
                < self.root_node.initial_value + 0.01
            ):
                print("info increase time")
                time_checked = True
                time_bonus = self.time_buffer_ms / 4
                self.time_buffer_ms -= time_bonus  # increase the movetime
                self.movetime_ms += time_bonus * 0.75
                self.root_node.initial_value = self.root_node.q_value[self.root_node.child_number_visits.argmax()]

                if self.time_buffer_ms < 0:
                    self.movetime_ms += self.time_buffer_ms
                    self.time_buffer_ms = 0
        self.cpuct = cpuct_init
        return max_depth_reached
Exemplo n.º 5
0
    def evaluate_board_state(self, state: GameState):  # Probably is better to be refactored
        """
        Analyzes the current board state. This is the main method which get called by the uci interface or analysis
        request.
        :param state: Actual game state to evaluate for the MCTS
        :return:
        """
        # Too many local variables (28/15) - Too many branches (25/12) - Too many statements (75/50)
        self.t_start_eval = time()  # store the time at which the search started

        if not self.net_pred_services[0].running:  # check if the net prediction service has already been started
            for net_pred_service in self.net_pred_services:  # start the prediction daemon thread
                net_pred_service.start()

        legal_moves = state.get_legal_moves()  # list of all possible legal move in the current board position

        if not legal_moves:  # consistency check
            raise Exception("The given board state has no legal move available")

        key = state.get_transposition_key() + (
            state.get_fullmove_number(),
        )  # check first if the the current tree can be reused

        if not self.use_pruning and key in self.node_lookup:
            chess_board = state.get_pythonchess_board()
            self.root_node = self.node_lookup[key]  # if key in self.node_lookup:
            if self.enhance_captures:
                self._enhance_captures(chess_board, legal_moves, self.root_node.policy_prob)
                # enhance checks for all direct child nodes
                for child_node in self.root_node.child_nodes:
                    if child_node:
                        self._enhance_captures(child_node.board, child_node.legal_moves, child_node.policy_prob)

            if self.enhance_checks:
                self._enhance_checks(chess_board, legal_moves, self.root_node.policy_prob)
                # enhance checks for all direct child nodes
                for child_node in self.root_node.child_nodes:
                    if child_node:
                        self._enhance_checks(child_node.board, child_node.legal_moves, child_node.policy_prob)

            logging.debug(
                "Reuse the search tree. Number of nodes in search tree: %d",
                self.root_node.nb_total_expanded_child_nodes,
            )
            self.total_nodes_pre_search = deepcopy(self.root_node.n_sum)
        else:
            logging.debug("Starting a brand new search tree...")
            self.root_node = None
            self.total_nodes_pre_search = 0

        if len(legal_moves) == 1:  # check for fast way out
            max_depth_reached = 1  # if there's only a single legal move you only must go 1 depth

            if self.root_node is None:
                # conduct all necessary steps for fastest way out
                self._expand_root_node_single_move(state, legal_moves)

            # increase the move time buffer
            # subtract half a second as a constant for possible delay
            self.time_buffer_ms += max(self.movetime_ms - 500, 0)
        else:
            if self.root_node is None:
                self._expand_root_node_multiple_moves(state, legal_moves)  # run a single expansion on the root node
            # opening guard
            if state.get_fullmove_number() <= self.opening_guard_moves:  # 100: #7: #10:
                self.root_node.q_value[self.root_node.policy_prob < 5e-2] = -9999
            # elif len(legal_moves) > 50:
            #    self.root_node.q_value[self.root_node.policy_prob < 1e-3] = -9999
            # conduct the mcts-search based on the given settings
            max_depth_reached = self._run_mcts_search(state)
            t_elapsed = time() - self.t_start_eval
            print("info string move overhead is %dms" % (t_elapsed * 1000 - self.movetime_ms))

        # receive the policy vector based on the MCTS search
        p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)  # , xth_n_max=xth_n_max, is_root=True)

        if self.use_future_q_values:
            # use q-future value to update the q-values of direct child nodes
            q_future, indices = self.get_last_q_values(min_nb_visits=5, max_depth=5) #25)
            # self.root_node.q_value = 0.5 * self.root_node.q_value + 0.5 * q_future
            # TODO: make this matrix vector form
            if max_depth_reached >= 5:
                for idx in indices:
                    self.root_node.q_value[idx] = min(self.root_node.q_value[idx], q_future[idx])
                p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight)

        # if self.use_pruning is False:
        self.node_lookup[key] = self.root_node  # store the current root in the lookup table
        best_child_idx = p_vec_small.argmax()  # select the q-value according to the mcts best child value
        value = self.root_node.q_value[best_child_idx]
        # value = orig_q[best_child_idx]
        lst_best_moves, _ = self.get_calculated_line()
        str_moves = self._mv_list_to_str(lst_best_moves)
        node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search)  # show the best calculated line
        time_e = time() - self.t_start_eval  # In uci the depth is given using half-moves notation also called plies

        if len(legal_moves) != len(p_vec_small):
            raise Exception(
                "Legal move list %s with length %s is incompatible to policy vector %s"
                " with shape %s for board state %s and nodes legal move list: %s"
                % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state, self.root_node.legal_moves)
            )

        # define the remaining return variables
        centipawns = value_to_centipawn(value)
        depth = max_depth_reached
        nodes = node_searched
        time_elapsed_s = time_e * 1000

        # avoid division by 0
        if time_e > 0.0:
            nps = node_searched / time_e
        else:
            # return a high constant in otherwise
            nps = 999999999

        pv = str_moves
        if self.verbose:
            score = "score cp %d depth %d nodes %d time %d nps %d pv %s" % (
                centipawns,
                depth,
                nodes,
                time_elapsed_s,
                nps,
                pv,
            )
            logging.info("info string %s", score)
        return value, legal_moves, p_vec_small, centipawns, depth, nodes, time_elapsed_s, nps, pv