def getDirectionalExpectimaxValue(self, gameState, agentIndex, depth): if (agentIndex == 0 and depth == 1) or gameState.isWin() or gameState.isLose(): return self.evaluationFunction(gameState) legalMoves = gameState.getLegalActions(agentIndex) if agentIndex == 0: return max( self.getDirectionalExpectimaxValue( state, getNextIndexAgent(agentIndex, gameState), depth - 1) for state in [ gameState.generatePacmanSuccessor(action) for action in legalMoves ]) else: ghost = DirectionalGhost(index=agentIndex) act_prob_dict = ghost.getDistribution(gameState) val_prob_dict = util.Counter() for action in legalMoves: state = gameState.generateSuccessor(agentIndex, action) val = self.getDirectionalExpectimaxValue( state, getNextIndexAgent(agentIndex, gameState), depth) val_prob_dict[val] = act_prob_dict[action] val_prob_dict.normalize() return util.chooseFromDistribution(val_prob_dict)
def prob_children(self, gameState, agent): from ghostAgents import DirectionalGhost ghost = DirectionalGhost(agent) dist = ghost.getDistribution(gameState) return [(gameState.generateSuccessor(agent, dir), prob) for dir, prob in dist.items()]
def expectiLevel(gameState, depth, agentindex): if gameState.isWin() or gameState.isLose() or depth == 0: return self.evaluationFunction(gameState) legalActions = gameState.getLegalActions(agentindex) ghostState = DirectionalGhost(agentindex, prob_attack=0.8, prob_scaredFlee=0.8) p = DirectionalGhost.getDistribution(ghostState, gameState) successors = [ gameState.generateSuccessor(agentindex, action) for action in legalActions ] if agentindex == numGhosts: successorsScore = sum([ p[action] * maxLevel(suc, depth - 1) for (suc, action) in zip(successors, legalActions) ]) else: successorsScore = sum([ p[action] * expectiLevel(suc, depth, agentindex + 1) for (suc, action) in zip(successors, legalActions) ]) # bestScore = min(successorsScore) return successorsScore if successorsScore < float( "inf") else float("inf")
def rb_directional_expectimax(self, cur_state: GameState, turn: int, agent: int, depth_limit: int, depth: int, ghost_num: int): if turn == agent: depth += 1 if depth >= depth_limit or cur_state.isWin() or cur_state.isLose(): return self.evaluationFunction(cur_state) if turn == agent: # if Pacman's turn cur_max = np.NINF for action in cur_state.getLegalPacmanActions( ): # iterating over children gameStates child_state = cur_state.generateSuccessor(turn, action) cur_max = max( cur_max, self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) return cur_max else: # if ghost turn assert turn > agent ghost_legal_moves = cur_state.getLegalActions(turn) ghost = DirectionalGhost(turn) # assert len(ghost_legal_moves) is not 0 expectancy = 0 for action in ghost_legal_moves: child_state = cur_state.generateSuccessor(turn, action) dist = ghost.getDistribution(cur_state) # print(dist) expectancy += (dist[action]) * (self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) if math.isnan(expectancy): expectancy = 0 return expectancy
def test0(agentName): stats = {} if agentName == 'alphabeta': stats = run('smallClassic', submission.AlphaBetaAgent(depth=2), [DirectionalGhost(i + 1) for i in range(2)], name='%s (depth %d)' % ('alphabeta', 2)) elif agentName == 'minimax': stats = run('smallClassic', submission.MinimaxAgent(depth=2), [DirectionalGhost(i + 1) for i in range(2)], name='%s (depth %d)' % ('minimax', 2)) else: stats = run('smallClassic', submission.ExpectimaxAgent(depth=2), [DirectionalGhost(i + 1) for i in range(2)], name='%s (depth %d)' % ('expectimax', 2)) if stats['timeouts'] > 0: grader.fail('Your ' + agentName + ' agent timed out on smallClassic. No autograder feedback will be provided.') return grader.assignFullCredit()
def timeout_test(self, agentName): stats = {} if agentName == 'alphabeta': stats = run('smallClassic', submission.AlphaBetaAgent(depth=2), [DirectionalGhost(i + 1) for i in range(2)], name='%s (depth %d)' % ('alphabeta', 2)) elif agentName == 'minimax': stats = run('smallClassic', submission.MinimaxAgent(depth=2), [DirectionalGhost(i + 1) for i in range(2)], name='%s (depth %d)' % ('minimax', 2)) else: stats = run('smallClassic', submission.ExpectimaxAgent(depth=2), [DirectionalGhost(i + 1) for i in range(2)], name='%s (depth %d)' % ('expectimax', 2)) print(agentName) print(stats['timeouts']) self.assertLessEqual(stats['timeouts'], 0, msg=f'Your {agentName} agent timed out on smallClassic. No autograder feedback will be provided.')
def writeSolution(self, moduleDict, filePath): # load module, set seed, create ghosts and macman, run game multiAgents = moduleDict['multiAgents'] random.seed(self.seed) lay = layout.Layout([l.strip() for l in self.layout_text.split('\n')]) if self.alg == 'ExpectimaxAgent': ourPacOptions = {'expectimax': 'True'} elif self.alg == 'AlphaBetaAgent': ourPacOptions = {'alphabeta': 'True'} else: ourPacOptions = {} pac = PolyAgent(self.seed, multiAgents, ourPacOptions, self.depth) disp = self.question.getDisplay() run(lay, self.layout_name, pac, [DirectionalGhost(i + 1) for i in range(2)], disp, name=self.alg) (optimalActions, altDepthActions, partialPlyBugActions) = pac.getTraces() # recover traces and record to file handle = open(filePath, 'w') self.writeList(handle, 'optimalActions', optimalActions) self.writeList(handle, 'altDepthActions', altDepthActions) self.writeList(handle, 'partialPlyBugActions', partialPlyBugActions) handle.close()
def writeSolution(self, moduleDict, filePath): # load module, set seed, create ghosts and macman, run game multiAgents = moduleDict["multiAgents"] random.seed(self.seed) lay = layout.Layout([l.strip() for l in self.layout_text.split("\n")]) if self.alg == "ExpectimaxAgent": ourPacOptions = {"expectimax": "True"} elif self.alg == "AlphaBetaAgent": ourPacOptions = {"alphabeta": "True"} else: ourPacOptions = {} pac = PolyAgent(self.seed, multiAgents, ourPacOptions, self.depth) disp = self.question.getDisplay() run( lay, self.layout_name, pac, [DirectionalGhost(i + 1) for i in range(2)], disp, name=self.alg, ) (optimalActions, altDepthActions, partialPlyBugActions) = pac.getTraces() # recover traces and record to file handle = open(filePath, "w") self.writeList(handle, "optimalActions", optimalActions) self.writeList(handle, "altDepthActions", altDepthActions) self.writeList(handle, "partialPlyBugActions", partialPlyBugActions) handle.close()
def execute(self, grades, moduleDict, solutionDict): # load student code and staff code solutions multiAgents = moduleDict["multiAgents"] studentAgent = getattr(multiAgents, self.alg)(depth=self.depth) allActions = [json.loads(x) for x in solutionDict["optimalActions"].split("\n")] altDepthActions = [ json.loads(x) for x in solutionDict["altDepthActions"].split("\n") ] partialPlyBugActions = [ json.loads(x) for x in solutionDict["partialPlyBugActions"].split("\n") ] # set up game state and play a game random.seed(self.seed) lay = layout.Layout([l.strip() for l in self.layout_text.split("\n")]) pac = GradingAgent( self.seed, studentAgent, allActions, altDepthActions, partialPlyBugActions ) # check return codes and assign grades disp = self.question.getDisplay() stats = run( lay, self.layout_name, pac, [DirectionalGhost(i + 1) for i in range(2)], disp, name=self.alg, ) if stats["timeouts"] > 0: self.addMessage("Agent timed out on smallClassic. No credit") return self.testFail(grades) if stats["crashes"] > 0: self.addMessage("Agent crashed on smallClassic. No credit") return self.testFail(grades) code = pac.checkFailure() if code == 0: return self.testPass(grades) elif code == -3: if pac.getWrongStatesExplored() >= 0: self.addMessage("Bug: Wrong number of states expanded.") return self.testFail(grades) else: return self.testPass(grades) elif code == -2: self.addMessage("Bug: Partial Ply Bug") return self.testFail(grades) elif code == -1: self.addMessage("Bug: Search depth off by 1") return self.testFail(grades) elif code > 0: moves = pac.getSuboptimalMoves() state, studentMove, optMove = random.choice(moves) self.addMessage("Bug: Suboptimal moves") self.addMessage( "State:%s\nStudent Move:%s\nOptimal Move:%s" % (state, studentMove, optMove) ) return self.testFail(grades)
def directionalExpectimaxValue(self, gameState, agentIndex, searchDepth): # The base cases # if reached self.depth or reached a leaf - stop and return value of heuristic function of state if searchDepth == self.depth or gameState.isWin() or gameState.isLose( ): return self.evaluationFunction(gameState) # The recursion current_agent_index = agentIndex if gameState.getNumAgents() == current_agent_index: current_agent_index = 0 # the randomGhost instance to call getDistribution on. It holds the correct ghost index directional_ghost = DirectionalGhost(current_agent_index) legal_agent_actions = gameState.getLegalActions(current_agent_index) children_states = [ gameState.generateSuccessor(current_agent_index, action) for action in legal_agent_actions ] if current_agent_index == 0: # It is pacman's turn - we want to maximize the choice cur_max = float('-inf') for c in children_states: v = self.directionalExpectimaxValue(c, current_agent_index + 1, searchDepth + 1) cur_max = max(v, cur_max) return cur_max else: # It is a ghost's turn - a probabilistic state sum = 0 # get the Counter of probabilities probabilities = directional_ghost.getDistribution(gameState) probabilities_keys = list(probabilities.keys()) # Computer and return the sum on all the probabilities multiplied by the corresponding randomExpectimaxValue for i in range(len(children_states)): next_key = probabilities_keys[i] sum += probabilities.get( next_key) * self.directionalExpectimaxValue( children_states[i], current_agent_index + 1, searchDepth) return sum
def execute(self, grades, moduleDict, solutionDict): # load student code and staff code solutions multiAgents = moduleDict['multiAgents'] studentAgent = getattr(multiAgents, self.alg)(depth=self.depth) allActions = map(lambda x: json.loads(x), solutionDict['optimalActions'].split('\n')) altDepthActions = map(lambda x: json.loads(x), solutionDict['altDepthActions'].split('\n')) partialPlyBugActions = map( lambda x: json.loads(x), solutionDict['partialPlyBugActions'].split('\n')) # set up game state and play a game random.seed(self.seed) lay = layout.Layout([l.strip() for l in self.layout_text.split('\n')]) pac = GradingAgent(self.seed, studentAgent, allActions, altDepthActions, partialPlyBugActions) # check return codes and assign grades disp = self.question.getDisplay() stats = run(lay, self.layout_name, pac, [DirectionalGhost(i + 1) for i in range(2)], disp, name=self.alg) print pac.__dict__.keys() if stats['timeouts'] > 0: self.addMessage('Agent timed out on smallClassic. No credit') return self.testFail(grades) if stats['crashes'] > 0: self.addMessage('Agent crashed on smallClassic. No credit') return self.testFail(grades) code = pac.checkFailure() if code == 0: return self.testPass(grades) elif code == -3: if pac.getWrongStatesExplored() >= 0: print pac.getWrongStatesExplored() self.addMessage('Bug: Wrong number of states expanded.') return self.testFail(grades) else: return self.testPass(grades) elif code == -2: self.addMessage('Bug: Partial Ply Bug') return self.testFail(grades) elif code == -1: self.addMessage('Bug: Search depth off by 1') return self.testFail(grades) elif code > 0: moves = pac.getSuboptimalMoves() state, studentMove, optMove = random.choice(moves) self.addMessage('Bug: Suboptimal moves') self.addMessage('State:%s\nStudent Move:%s\nOptimal Move:%s' % (state, studentMove, optMove)) return self.testFail(grades)
def figure_ghost_type(gameState, next_pos, directional, ghost_dists): ghosts_pos = gameState.getGhostPositions() ghost_type = 'random' index = 0 if len(ghost_dists) == 0: return 'no_ghosts', next_pos, directional, True for g_pos in ghosts_pos: if next_pos[index] == g_pos: directional.append(1) ghost_type = 'directional' elif next_pos[index] is not None: directional.append(0) ghost_type = 'random' g = DirectionalGhost(index + 1) d = g.getAction(gameState) if d is 'East': next_pos[index] = tuple( [ghosts_pos[index][0] + 1, ghosts_pos[index][1]]) elif d is 'West': next_pos[index] = tuple( [ghosts_pos[index][0] - 1, ghosts_pos[index][1]]) elif d is 'North': next_pos[index] = tuple( [ghosts_pos[index][0], ghosts_pos[index][1] + 1]) elif d is 'South': next_pos[index] = tuple( [ghosts_pos[index][0], ghosts_pos[index][1] - 1]) index += 1 if len(directional) > 5: if sum(directional) > 4: return 'directional', next_pos, directional, False else: return 'random', next_pos, directional, False return ghost_type, next_pos, directional, True
def rbExpectimax(gameState, agent, depth, multiAgent, ghostType): assert (ghostType == 'random_ghost' or ghostType == 'directional_ghost') # for terminal state or depth reached we will return the huristic estimation if isTerminalState(gameState) or depth == 0: return multiAgent.evaluationFunction(gameState) # find the next agent modulo the number of agents next_agent = (agent + 1) % gameState.getNumAgents() # update the depth if we finished a full round of turns # pacmang, ..., last ghost if next_agent == 0: next_depth = depth - 1 else: next_depth = depth # create a ghost agent to access its distribution if ghostType == 'random_ghost': ghostAgent = RandomGhost(agent) else: ghostAgent = DirectionalGhost(agent) # pacman - maximinzing if agent == 0: cur_max = -np.inf legal_action = gameState.getLegalActions(agent) for action in legal_action: c = gameState.generateSuccessor(agent, action) v = rbExpectimax(c, next_agent, next_depth, multiAgent, ghostType) cur_max = max(cur_max, v) return cur_max # ghost - tohelet else: tohelet = 0 dist = ghostAgent.getDistribution(gameState) for action, prob in dist.items(): c = gameState.generateSuccessor(agent, action) v = rbExpectimax(c, next_agent, next_depth, multiAgent, ghostType) tohelet += v * prob return tohelet
def __init__(self, enable_render=False, layout_name="mediumClassic", view_distance=(2, 2)): self.layouts = dict() self.layout_name = layout_name self.pacman = KeyboardAgent() self.ghosts = [ RandomGhost(i + 1) if i % 2 == 0 else DirectionalGhost(i + 1) for i in range(20) ] frameTime = 0.03 textDisplay.SLEEP_TIME = frameTime self.display_text = textDisplay.PacmanGraphics() self.display_graphics = graphicsDisplay.PacmanGraphics( 1.0, frameTime=frameTime) self.beQuiet = True self.game = None self.view_distance = view_distance self.textGraphics = False self.reset(enable_render=enable_render, layout_name=layout_name)
def getDirectionalGhosts(n): return [DirectionalGhost(i + 1) for i in range(n)]
def getActionAux(self, gameState, agent, depth): if self.isFinalState(gameState): return gameState.getScore() if depth == 0: return self.evaluationFunction(gameState) numOfAgents = gameState.getNumAgents() nextAgent = (agent + 1) % numOfAgents legalActions = gameState.getLegalActions(agent) ###actions = [action for action in legalActions] nextStates = [ gameState.generateSuccessor(agent, action) for action in legalActions ] if agent == self.index: # Pacman's turn # Initializing values bestMaxScore = -math.inf wantedMove = Directions.STOP scores = [ self.getActionAux(state, nextAgent, depth) for state in nextStates ] bestScore = max(scores) bestIndices = [ index for index in range(len(scores)) if scores[index] == bestScore ] chosenIndex = random.choice( bestIndices) # Pick randomly among the best wantedMove = legalActions[chosenIndex] # If we're at the root of the game tree - returned the preferred move # else - return the score if depth == self.depth: return wantedMove else: return bestScore else: # Ghost (min player) - randomGhost totalScore = 0 # best score for the min_agent is the lowest score ghostHelper = DirectionalGhost(agent) ghostDist = ghostHelper.getDistribution(gameState) prob_sum = 0 ###Add normalization for action, state in zip(legalActions, nextStates): if nextAgent == self.index: # This is the last ghost's turn, next turn is Pacman's if depth == 1: ### maybe 1? # Next states are leaves (we've reached the maximum depth) totalScore += self.evaluationFunction( state) * ghostDist[action] prob_sum += ghostDist[action] else: totalScore += self.getActionAux( state, nextAgent, depth - 1) * ghostDist[action] prob_sum += ghostDist[action] else: totalScore += self.getActionAux(state, nextAgent, depth) * ghostDist[action] prob_sum += ghostDist[action] assert prob_sum == 1 assert prob_sum != 0 ### Just for sanity check return totalScore / prob_sum