def min_value(self, state: GameState, depth: int, ghost_num: int) -> int: # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) # Sanity check: valid ghost number? assert 1 <= ghost_num < state.getNumAgents() legal_actions = state.getLegalActions(ghost_num) successors = [ state.generateSuccessor(ghost_num, ghost_action) for ghost_action in legal_actions ] # If this is the last ghost, next optimizer should be from pacman's # perspective next_optimizer = self.max_value \ if ghost_num == state.getNumAgents() - 1 \ else self.min_value # If this is the last ghost, decrement depth next_depth = depth - 1 \ if ghost_num == state.getNumAgents() - 1 \ else depth utilities = [ next_optimizer(state, next_depth, ghost_num + 1) for state in successors ] return min(utilities)
def min_value( self, state: GameState, depth: int, alpha: int, beta: int, ghost_num: int, ) -> int: # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) # Sanity check: valid ghost number? assert 1 <= ghost_num < state.getNumAgents() legal_actions = state.getLegalActions(ghost_num) # If this is the last ghost, next optimizer should be from pacman's # perspective next_optimizer = self.max_value \ if ghost_num == state.getNumAgents() - 1 \ else self.min_value # If this is the last ghost, decrement depth next_depth = depth - 1 if ghost_num == state.getNumAgents( ) - 1 else depth utility = inf for action in legal_actions: successor = state.generateSuccessor( agentIndex=ghost_num, action=action, ) utility = min( utility, next_optimizer( successor, next_depth, alpha, beta, ghost_num + 1, ), ) if utility < alpha: return utility beta = min(beta, utility) return utility
def searchTree(state: GameState, depth: int, agent: int): actions = state.getLegalActions(agent) nextAgent = (agent + 1) % state.getNumAgents() if state.isLose() or state.isWin() or len(actions) == 0: return [state.getScore(), None] elif depth == 0: return [self.evaluationFunction(state), None] elif agent == 0: successors = [ searchTree(state.generateSuccessor(agent, action), depth, nextAgent)[0] for action in actions ] maximum = max(successors) maxIndex = successors.index(maximum) return [maximum, actions[maxIndex]] else: nextDepth = depth if nextAgent == 0: nextDepth -= 1 successors = [ searchTree(state.generateSuccessor(agent, action), nextDepth, nextAgent)[0] for action in actions ] expected = sum(successors) * 1.0 / len(successors) return [expected, None]
def max_value( self, state: GameState, depth: int, alpha: int, beta: int, actor: Optional[int] = None, ) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) utility = -inf for action in legal_actions: successor = state.generateSuccessor(agentIndex=0, action=action) utility = max( utility, self.min_value(successor, depth, alpha, beta, ghost_num=1), ) if utility > beta: return utility alpha = max(alpha, utility) return utility
def _alphabeta(self, gameState: GameState, idx: int, ab: List[float]) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) pacman = (agent == 0) idx0 = int(pacman) idx1 = int(not pacman) mod = 1 if pacman else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._alphabeta(s, idx + 1, [*ab])[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction if best_score * mod > ab[idx0] * mod: break ab[idx1] = max(ab[idx1] * mod, best_score * mod) * mod return (best_score, best_action)
def minimax(evalFunc: classmethod, agent: int, depth: int, gameState: GameState, maxDepth: int) -> float: if gameState.isLose() or gameState.isWin() or depth == maxDepth: return evalFunc(gameState) if agent == 0: return max(minimax(evalFunc, 1, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent)) else: nextAgent = agent + 1 if gameState.getNumAgents() == nextAgent: nextAgent = 0 if nextAgent == 0: depth += 1 return min(minimax(evalFunc, nextAgent, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent))
def _minimax(self, gameState: GameState, idx: int) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) mod = 1 if agent == 0 else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._minimax(s, idx + 1)[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction return (best_score, best_action)
def max_value(self, state: GameState, depth: int, actor: Optional[int] = None) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) successors = [ state.generateSuccessor(agentIndex=0, action=action) for action in legal_actions ] utilities = [ self.min_value(state, depth, ghost_num=1) for state in successors ] return max(utilities)
def _expectimax(self, gameState: GameState, idx: int) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) n_actions = len(legalActions) ret_score = -float('inf') if agent == 0 else 0 ret_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._expectimax(s, idx + 1)[0] if agent != 0: ret_score += score / n_actions elif score > ret_score: ret_score, ret_action = score, legalAction return (ret_score, ret_action)
def searchTree(state: GameState, depth: int, agent: int, a, b): actions = state.getLegalActions(agent) nextAgent = (agent + 1) % state.getNumAgents() if state.isLose() or state.isWin() or len(actions) == 0: return [state.getScore(), None] elif depth == 0: return [self.evaluationFunction(state), None] elif agent == 0: value = float('-inf') successors = [] for action in actions: curr = searchTree(state.generateSuccessor(agent, action), depth, nextAgent, a, b)[0] successors.append(curr) value = max(value, curr) a = max(a, value) if a >= b: break maxIndex = successors.index(value) return [value, actions[maxIndex]] else: nextDepth = depth if nextAgent == 0: nextDepth -= 1 value = float('inf') successors = [] for action in actions: curr = searchTree(state.generateSuccessor(agent, action), nextDepth, nextAgent, a, b)[0] successors.append(curr) value = min(value, curr) b = min(b, value) if a >= b: break minIndex = successors.index(value) return [value, actions[minIndex]]
def is_a_new_level_of_search(self, game_state: GameState, current_ghost_index): return current_ghost_index == game_state.getNumAgents() - 1