def evaluate_board_state( self, state: AbsGameState): # Too few public methods (1/2) """ The greedy agent always performs the first legal move with the highest move probability :param state: Gamestate object :return: value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won selected_move - Python chess move object of the selected move confidence - Probability value for the selected move in the probability distribution idx - Integer index of the move which was returned centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view depth - Depth which was reached after the search nodes - Number of nodes which have been evaluated in the search time_elapsed_s - Elapsed time in seconds for the full search nps - Nodes per second metric pv - Calculated best line for both players """ t_start_eval = time() pred_value, pred_policy = self._net.predict_single( state.get_state_planes()) legal_moves = list(state.get_legal_moves()) p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.mirror_policy()) # define the remaining return variables time_e = time() - t_start_eval centipawn = value_to_centipawn(pred_value) depth = nodes = 1 time_elapsed_s = time_e * 1000 nps = nodes / time_e # use the move with the highest probability as the best move for logging pv = legal_moves[p_vec_small.argmax()].uci() return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
def evaluate_board_state(self, state: AbsGameState) -> tuple: """ Evaluates a given board position according to alpha beta search :param state: Game state object :return: """ self.t_start_eval = time() value = self.negamax(state, depth=self.depth, alpha=-math.inf, beta=math.inf, color=1 if state.board.turn else -1) legal_moves = state.get_legal_moves() policy = np.zeros(len(legal_moves)) policy[self.sel_mv_idx[0]] = 1 centipawn = value_to_centipawn(value) # depth = 1 nodes = self.nodes time_e = time( ) - self.t_start_eval # In uci the depth is given using half-moves notation also called plies time_elapsed_s = time_e * 1000 nps = nodes / time_e pv = self.best_moves[0].uci() logging.info(f"{self.best_moves}") logging.info(f"Value: {value}, Centipawn: {centipawn}") return value, legal_moves, policy, centipawn, self.depth, nodes, time_elapsed_s, nps, pv
def evaluate_board_state( self, state: AbsGameState): # Too few public methods (1/2) """ The greedy agent always performs the first legal move with the highest move probability :param state: Gamestate object :return: value - Value prediction in the current players view from [-1,1]: -1 -> 100% lost, +1 100% won selected_move - Python chess move object of the selected move confidence - Probability value for the selected move in the probability distribution idx - Integer index of the move which was returned centipawn - Centi pawn evaluation which is converted from the value prediction in currents player view depth - Depth which was reached after the search nodes - Number of nodes which have been evaluated in the search time_elapsed_s - Elapsed time in seconds for the full search nps - Nodes per second metric pv - Calculated best line for both players """ t_start_eval = time() # Start sync inference print("Starting inference") print("Preparing input blobs") input_blob = next(iter(self._net.read_net.input_info)) output_blob = iter(self._net.read_net.outputs) pred_policy_blob = next(output_blob) pred_value_blob = next(output_blob) # NB: This is required to load the image as uint8 np.array # Without this step the input blob is loaded in FP32 precision, # this requires additional operation and more memory. self._net.read_net.input_info[input_blob].precision = "U8" res = self._net.exec_net.infer( inputs={input_blob: state.get_state_planes()}) #TODO Check order of output pred_value = res[pred_value_blob][0][0] pred_policy = res[pred_policy_blob][0] legal_moves = list(state.get_legal_moves()) p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move()) # define the remaining return variables time_e = time() - t_start_eval centipawn = value_to_centipawn(pred_value) depth = nodes = 1 time_elapsed_s = time_e * 1000 nps = nodes / time_e # use the move with the highest probability as the best move for logging pv = legal_moves[p_vec_small.argmax()].uci() return pred_value, legal_moves, p_vec_small, centipawn, depth, nodes, time_elapsed_s, nps, pv
def _run_mcts_search(self, state): """ Runs a new or continues the mcts on the current search tree. :param state: Input state given by the user :return: max_depth_reached (int) - The longest search path length after the whole search """ self.node_lookup = {} # clear the look up table self.root_node_prior_policy = deepcopy(self.root_node.policy_prob) # safe the prior policy of the root node # apply dirichlet noise to the prior probabilities in order to ensure # that every move can possibly be visited self.root_node.apply_dirichlet_noise_to_prior_policy(epsilon=self.dirichlet_epsilon, alpha=self.dirichlet_alpha) # store what depth has been reached at maximum in the current search tree max_depth_reached = 1 # default is 1, in case only 1 move is available futures = [] if state.are_pocket_empty(): # set the number of playouts accordingly nb_playouts = self.nb_playouts_empty_pockets else: nb_playouts = self.nb_playouts_filled_pockets t_elapsed_ms = cur_playouts = 0 old_time = time() cpuct_init = self.cpuct if self.use_time_management: time_checked = time_checked_early = False else: time_checked = time_checked_early = True while ( max_depth_reached < self.max_search_depth and cur_playouts < nb_playouts and t_elapsed_ms < self.movetime_ms ): # and np.abs(self.root_node.q_value.mean()) < 0.99: # start searching with ThreadPoolExecutor(max_workers=self.threads) as executor: for i in range(self.threads): # calculate the thread id based on the current playout futures.append( executor.submit( self._run_single_playout, parent_node=self.root_node, pipe_id=i, depth=1, chosen_nodes=[] ) ) cur_playouts += self.threads time_show_info = time() - old_time for i, future in enumerate(futures): cur_value, cur_depth, chosen_nodes = future.result() if cur_depth > max_depth_reached: max_depth_reached = cur_depth # Print the explored line of the last line for every x seconds if verbose is true if self.verbose and time_show_info > 0.5 and i == len(futures) - 1: mv_list = self._create_mv_list(chosen_nodes) str_moves = self._mv_list_to_str(mv_list) print( "info score cp %d depth %d nodes %d pv %s" % (value_to_centipawn(cur_value), cur_depth, self.root_node.n_sum, str_moves) ) logging.debug("Update info") old_time = time() t_elapsed = time() - self.t_start_eval # update the current search time t_elapsed_ms = t_elapsed * 1000 if time_show_info > 1: node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search) print("info nps %d time %d" % (int((node_searched / t_elapsed)), t_elapsed_ms)) if not time_checked_early and t_elapsed_ms > self.movetime_ms / 2: if ( self.root_node.policy_prob.max() > 0.9 and self.root_node.policy_prob.argmax() == self.root_node.q_value.argmax() ): self.time_buffer_ms += (self.movetime_ms - t_elapsed_ms) * 0.9 print("info early break up") break else: time_checked_early = True if ( self.time_buffer_ms > 2500 and not time_checked and t_elapsed_ms > self.movetime_ms * 0.9 and self.root_node.q_value[self.root_node.child_number_visits.argmax()] < self.root_node.initial_value + 0.01 ): print("info increase time") time_checked = True time_bonus = self.time_buffer_ms / 4 self.time_buffer_ms -= time_bonus # increase the movetime self.movetime_ms += time_bonus * 0.75 self.root_node.initial_value = self.root_node.q_value[self.root_node.child_number_visits.argmax()] if self.time_buffer_ms < 0: self.movetime_ms += self.time_buffer_ms self.time_buffer_ms = 0 self.cpuct = cpuct_init return max_depth_reached
def evaluate_board_state(self, state: GameState): # Probably is better to be refactored """ Analyzes the current board state. This is the main method which get called by the uci interface or analysis request. :param state: Actual game state to evaluate for the MCTS :return: """ # Too many local variables (28/15) - Too many branches (25/12) - Too many statements (75/50) self.t_start_eval = time() # store the time at which the search started if not self.net_pred_services[0].running: # check if the net prediction service has already been started for net_pred_service in self.net_pred_services: # start the prediction daemon thread net_pred_service.start() legal_moves = state.get_legal_moves() # list of all possible legal move in the current board position if not legal_moves: # consistency check raise Exception("The given board state has no legal move available") key = state.get_transposition_key() + ( state.get_fullmove_number(), ) # check first if the the current tree can be reused if not self.use_pruning and key in self.node_lookup: chess_board = state.get_pythonchess_board() self.root_node = self.node_lookup[key] # if key in self.node_lookup: if self.enhance_captures: self._enhance_captures(chess_board, legal_moves, self.root_node.policy_prob) # enhance checks for all direct child nodes for child_node in self.root_node.child_nodes: if child_node: self._enhance_captures(child_node.board, child_node.legal_moves, child_node.policy_prob) if self.enhance_checks: self._enhance_checks(chess_board, legal_moves, self.root_node.policy_prob) # enhance checks for all direct child nodes for child_node in self.root_node.child_nodes: if child_node: self._enhance_checks(child_node.board, child_node.legal_moves, child_node.policy_prob) logging.debug( "Reuse the search tree. Number of nodes in search tree: %d", self.root_node.nb_total_expanded_child_nodes, ) self.total_nodes_pre_search = deepcopy(self.root_node.n_sum) else: logging.debug("Starting a brand new search tree...") self.root_node = None self.total_nodes_pre_search = 0 if len(legal_moves) == 1: # check for fast way out max_depth_reached = 1 # if there's only a single legal move you only must go 1 depth if self.root_node is None: # conduct all necessary steps for fastest way out self._expand_root_node_single_move(state, legal_moves) # increase the move time buffer # subtract half a second as a constant for possible delay self.time_buffer_ms += max(self.movetime_ms - 500, 0) else: if self.root_node is None: self._expand_root_node_multiple_moves(state, legal_moves) # run a single expansion on the root node # opening guard if state.get_fullmove_number() <= self.opening_guard_moves: # 100: #7: #10: self.root_node.q_value[self.root_node.policy_prob < 5e-2] = -9999 # elif len(legal_moves) > 50: # self.root_node.q_value[self.root_node.policy_prob < 1e-3] = -9999 # conduct the mcts-search based on the given settings max_depth_reached = self._run_mcts_search(state) t_elapsed = time() - self.t_start_eval print("info string move overhead is %dms" % (t_elapsed * 1000 - self.movetime_ms)) # receive the policy vector based on the MCTS search p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight) # , xth_n_max=xth_n_max, is_root=True) if self.use_future_q_values: # use q-future value to update the q-values of direct child nodes q_future, indices = self.get_last_q_values(min_nb_visits=5, max_depth=5) #25) # self.root_node.q_value = 0.5 * self.root_node.q_value + 0.5 * q_future # TODO: make this matrix vector form if max_depth_reached >= 5: for idx in indices: self.root_node.q_value[idx] = min(self.root_node.q_value[idx], q_future[idx]) p_vec_small = self.root_node.get_mcts_policy(self.q_value_weight) # if self.use_pruning is False: self.node_lookup[key] = self.root_node # store the current root in the lookup table best_child_idx = p_vec_small.argmax() # select the q-value according to the mcts best child value value = self.root_node.q_value[best_child_idx] # value = orig_q[best_child_idx] lst_best_moves, _ = self.get_calculated_line() str_moves = self._mv_list_to_str(lst_best_moves) node_searched = int(self.root_node.n_sum - self.total_nodes_pre_search) # show the best calculated line time_e = time() - self.t_start_eval # In uci the depth is given using half-moves notation also called plies if len(legal_moves) != len(p_vec_small): raise Exception( "Legal move list %s with length %s is incompatible to policy vector %s" " with shape %s for board state %s and nodes legal move list: %s" % (legal_moves, len(legal_moves), p_vec_small, p_vec_small.shape, state, self.root_node.legal_moves) ) # define the remaining return variables centipawns = value_to_centipawn(value) depth = max_depth_reached nodes = node_searched time_elapsed_s = time_e * 1000 # avoid division by 0 if time_e > 0.0: nps = node_searched / time_e else: # return a high constant in otherwise nps = 999999999 pv = str_moves if self.verbose: score = "score cp %d depth %d nodes %d time %d nps %d pv %s" % ( centipawns, depth, nodes, time_elapsed_s, nps, pv, ) logging.info("info string %s", score) return value, legal_moves, p_vec_small, centipawns, depth, nodes, time_elapsed_s, nps, pv