def recurse(state: State, alpha, beta) -> Tuple[int, object]: """ 根据当前状态返回一个当前最佳效应和所对应的动作 :param state: 当前的状态 :param alpha: 到当前状态的最开始玩家的收益下界 :param beta: 到结束状态的最对手玩家的收益上界 :return: 返回一个元组 (utility action) """ is_over, winner = state.get_state_result() if is_over: if winner == self.player: return 1, None elif winner == get_opponent(self.player): return -1, None else: return 0, None available_actions = state.get_available_actions() if state.player == self.player: max_value = (float("-inf"), None) for action in available_actions: max_value = max(max_value, (recurse( state.get_next_state(action), alpha, beta)[0], action), key=lambda x: x[0]) alpha = max(alpha, max_value[0]) if beta <= alpha: break return max_value elif state.player == get_opponent(self.player): min_value = (float("inf"), None) for action in available_actions: min_value = min(min_value, (recurse( state.get_next_state(action), alpha, beta)[0], action), key=lambda x: x[0]) beta = min(beta, min_value[0]) if beta <= alpha: break return min_value
def recurse(state: State) -> Tuple[int, object]: """ 根据当前状态返回一个当前最佳效应和所对应的动作 :param state: 当前的状态 :return: 返回一个元组 (utility action) """ is_over, winner = state.get_state_result() if is_over: if winner == state.player: return 1, None elif winner == get_opponent(state.player): return -1, None else: return 0, None available_actions = state.get_available_actions() values = [ -recurse(state.get_next_state(action))[0] for action in available_actions ] kws = pd.Series(data=values, index=available_actions) action = kws.idxmax() return kws[action], action