def prob_children(self, gameState, agent): from ghostAgents import DirectionalGhost ghost = DirectionalGhost(agent) dist = ghost.getDistribution(gameState) return [(gameState.generateSuccessor(agent, dir), prob) for dir, prob in dist.items()]
def getDirectionalExpectimaxValue(self, gameState, agentIndex, depth): if (agentIndex == 0 and depth == 1) or gameState.isWin() or gameState.isLose(): return self.evaluationFunction(gameState) legalMoves = gameState.getLegalActions(agentIndex) if agentIndex == 0: return max( self.getDirectionalExpectimaxValue( state, getNextIndexAgent(agentIndex, gameState), depth - 1) for state in [ gameState.generatePacmanSuccessor(action) for action in legalMoves ]) else: ghost = DirectionalGhost(index=agentIndex) act_prob_dict = ghost.getDistribution(gameState) val_prob_dict = util.Counter() for action in legalMoves: state = gameState.generateSuccessor(agentIndex, action) val = self.getDirectionalExpectimaxValue( state, getNextIndexAgent(agentIndex, gameState), depth) val_prob_dict[val] = act_prob_dict[action] val_prob_dict.normalize() return util.chooseFromDistribution(val_prob_dict)
def expectiLevel(gameState, depth, agentindex): if gameState.isWin() or gameState.isLose() or depth == 0: return self.evaluationFunction(gameState) legalActions = gameState.getLegalActions(agentindex) ghostState = DirectionalGhost(agentindex, prob_attack=0.8, prob_scaredFlee=0.8) p = DirectionalGhost.getDistribution(ghostState, gameState) successors = [ gameState.generateSuccessor(agentindex, action) for action in legalActions ] if agentindex == numGhosts: successorsScore = sum([ p[action] * maxLevel(suc, depth - 1) for (suc, action) in zip(successors, legalActions) ]) else: successorsScore = sum([ p[action] * expectiLevel(suc, depth, agentindex + 1) for (suc, action) in zip(successors, legalActions) ]) # bestScore = min(successorsScore) return successorsScore if successorsScore < float( "inf") else float("inf")
def rb_directional_expectimax(self, cur_state: GameState, turn: int, agent: int, depth_limit: int, depth: int, ghost_num: int): if turn == agent: depth += 1 if depth >= depth_limit or cur_state.isWin() or cur_state.isLose(): return self.evaluationFunction(cur_state) if turn == agent: # if Pacman's turn cur_max = np.NINF for action in cur_state.getLegalPacmanActions( ): # iterating over children gameStates child_state = cur_state.generateSuccessor(turn, action) cur_max = max( cur_max, self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) return cur_max else: # if ghost turn assert turn > agent ghost_legal_moves = cur_state.getLegalActions(turn) ghost = DirectionalGhost(turn) # assert len(ghost_legal_moves) is not 0 expectancy = 0 for action in ghost_legal_moves: child_state = cur_state.generateSuccessor(turn, action) dist = ghost.getDistribution(cur_state) # print(dist) expectancy += (dist[action]) * (self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) if math.isnan(expectancy): expectancy = 0 return expectancy
def directionalExpectimaxValue(self, gameState, agentIndex, searchDepth): # The base cases # if reached self.depth or reached a leaf - stop and return value of heuristic function of state if searchDepth == self.depth or gameState.isWin() or gameState.isLose( ): return self.evaluationFunction(gameState) # The recursion current_agent_index = agentIndex if gameState.getNumAgents() == current_agent_index: current_agent_index = 0 # the randomGhost instance to call getDistribution on. It holds the correct ghost index directional_ghost = DirectionalGhost(current_agent_index) legal_agent_actions = gameState.getLegalActions(current_agent_index) children_states = [ gameState.generateSuccessor(current_agent_index, action) for action in legal_agent_actions ] if current_agent_index == 0: # It is pacman's turn - we want to maximize the choice cur_max = float('-inf') for c in children_states: v = self.directionalExpectimaxValue(c, current_agent_index + 1, searchDepth + 1) cur_max = max(v, cur_max) return cur_max else: # It is a ghost's turn - a probabilistic state sum = 0 # get the Counter of probabilities probabilities = directional_ghost.getDistribution(gameState) probabilities_keys = list(probabilities.keys()) # Computer and return the sum on all the probabilities multiplied by the corresponding randomExpectimaxValue for i in range(len(children_states)): next_key = probabilities_keys[i] sum += probabilities.get( next_key) * self.directionalExpectimaxValue( children_states[i], current_agent_index + 1, searchDepth) return sum
def getActionAux(self, gameState, agent, depth): if self.isFinalState(gameState): return gameState.getScore() if depth == 0: return self.evaluationFunction(gameState) numOfAgents = gameState.getNumAgents() nextAgent = (agent + 1) % numOfAgents legalActions = gameState.getLegalActions(agent) ###actions = [action for action in legalActions] nextStates = [ gameState.generateSuccessor(agent, action) for action in legalActions ] if agent == self.index: # Pacman's turn # Initializing values bestMaxScore = -math.inf wantedMove = Directions.STOP scores = [ self.getActionAux(state, nextAgent, depth) for state in nextStates ] bestScore = max(scores) bestIndices = [ index for index in range(len(scores)) if scores[index] == bestScore ] chosenIndex = random.choice( bestIndices) # Pick randomly among the best wantedMove = legalActions[chosenIndex] # If we're at the root of the game tree - returned the preferred move # else - return the score if depth == self.depth: return wantedMove else: return bestScore else: # Ghost (min player) - randomGhost totalScore = 0 # best score for the min_agent is the lowest score ghostHelper = DirectionalGhost(agent) ghostDist = ghostHelper.getDistribution(gameState) prob_sum = 0 ###Add normalization for action, state in zip(legalActions, nextStates): if nextAgent == self.index: # This is the last ghost's turn, next turn is Pacman's if depth == 1: ### maybe 1? # Next states are leaves (we've reached the maximum depth) totalScore += self.evaluationFunction( state) * ghostDist[action] prob_sum += ghostDist[action] else: totalScore += self.getActionAux( state, nextAgent, depth - 1) * ghostDist[action] prob_sum += ghostDist[action] else: totalScore += self.getActionAux(state, nextAgent, depth) * ghostDist[action] prob_sum += ghostDist[action] assert prob_sum == 1 assert prob_sum != 0 ### Just for sanity check return totalScore / prob_sum