def betterEvaluationFunction(currentGameState: GameState) -> float: """ Your extreme, unstoppable evaluation function (problem 4). Note that you can't fix a seed in this function. """ # BEGIN_YOUR_CODE (our solution is 13 lines of code, but don't worry if you deviate from this) def getDistFromPacman(x, y): pacmanPos = currentGameState.getPacmanPosition() return abs(pacmanPos[0] - x) + abs(pacmanPos[1] - y) def exponentiallyWeightedScore(objectives): return sum(2**(-1 * getDistFromPacman(objectivePos[0], objectivePos[1])) for objectivePos in objectives) def getFoodScore(foodGrid, pacmanPosition): foodsPos = [(x, y) for y in range(foodGrid.height) for x in range(foodGrid.width) if foodGrid[x][y] == True] return exponentiallyWeightedScore(foodsPos) food = 9 * getFoodScore(currentGameState.getFood(), currentGameState.getPacmanPosition()) ghostStates = currentGameState.getGhostStates() scary = sum(ghostState.scaredTimer for ghostState in ghostStates) > 0 capsule = 100 if scary else 200 * exponentiallyWeightedScore( currentGameState.getCapsules()) scaredGhostsPos = [ ghostState.getPosition() for ghostState in ghostStates if ghostState.scaredTimer > 0 ] ghost = 200 * exponentiallyWeightedScore(scaredGhostsPos) score = currentGameState.getScore() return food + capsule + ghost + score
def getAction(self, state): GameState.getAndResetExplored() studentAction = (self.studentAgent.getAction(state), len(GameState.getAndResetExplored())) print studentAction optimalActions = self.optimalActions[self.stepCount] altDepthActions = self.altDepthActions[self.stepCount] partialPlyBugActions = self.partialPlyBugActions[self.stepCount] studentOptimalAction = False curRightStatesExplored = False; for i in range(len(optimalActions)): if studentAction[0] in optimalActions[i][0]: studentOptimalAction = True else: self.actionsConsistentWithOptimal[i] = False if studentAction[1] == int(optimalActions[i][1]): curRightStatesExplored = True if not curRightStatesExplored and self.wrongStatesExplored < 0: self.wrongStatesExplored = 1 for i in range(len(altDepthActions)): if studentAction[0] not in altDepthActions[i]: self.actionsConsistentWithAlternativeDepth[i] = False for i in range(len(partialPlyBugActions)): if studentAction[0] not in partialPlyBugActions[i]: self.actionsConsistentWithPartialPlyBug[i] = False if not studentOptimalAction: self.suboptimalMoves.append((state, studentAction[0], optimalActions[0][0][0])) self.stepCount += 1 random.seed(self.seed + self.stepCount) return optimalActions[0][0][0]
def max_value( self, state: GameState, depth: int, alpha: int, beta: int, actor: Optional[int] = None, ) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) utility = -inf for action in legal_actions: successor = state.generateSuccessor(agentIndex=0, action=action) utility = max( utility, self.min_value(successor, depth, alpha, beta, ghost_num=1), ) if utility > beta: return utility alpha = max(alpha, utility) return utility
def getAction(self, game_state: GameState): """ Returns the minimax action using self.depth and self.evaluationFunction """ "*** YOUR CODE HERE ***" # Generate candidate actions legal_actions = game_state.getLegalActions(self.pacman_index) # if Directions.STOP in legal_actions: # legal_actions.remove(Directions.STOP) alpha = -math.inf beta = math.inf scores = [] for action in legal_actions: successor = game_state.getNextState(self.pacman_index, action) # since we're expanding the root node, we need to call min_value since the next node is a min node value = self.min_value(successor, depth=0, ghost_index=0, alpha=alpha, beta=beta) scores.append(value) # can't prune on the root node alpha = max(alpha, value) best_score = max(scores) best_indices = [index for index in range(len(scores)) if scores[index] == best_score] chosen_index = random.choice(best_indices) # Pick randomly among the best return legal_actions[chosen_index]
def getAction(self, game_state: GameState) -> str: """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. Here are some method calls that might be useful when implementing minimax. gameState.getLegalActions(agentIndex): Returns a list of legal actions for an agent agentIndex=0 means Pacman, ghosts are >= 1 gameState.generateSuccessor(agentIndex, action): Returns the successor game state after an agent takes an action gameState.getNumAgents(): Returns the total number of agents in the game """ legal_actions = game_state.getLegalActions(agentIndex=0) best_action_index = max(range(len(legal_actions)), key=lambda action_num: self.min_value( state=game_state.generateSuccessor( agentIndex=0, action=legal_actions[action_num], ), depth=self.depth, ghost_num=1, )) return legal_actions[best_action_index]
def exp_value(self, game_state: GameState, depth=0, ghost_index=0): # next_ghost_to_move ghost_index += 1 if self.is_a_new_level_of_search(game_state, ghost_index): depth = depth + 1 if game_state.isWin() or game_state.isLose(): return self.evaluationFunction(game_state) value = 0 legal_actions = game_state.getLegalActions(ghost_index) for action in legal_actions: successor = game_state.getNextState(ghost_index, action) probability = 1 / len(legal_actions) if self.is_a_new_level_of_search(game_state, ghost_index): # let's move on with pacman since this is the last agent (new max node) value += probability * self.max_value(successor, depth=depth) else: # next on the tree is another minimizer, lets continue with another ghost value += probability * self.exp_value(successor, depth=depth, ghost_index=ghost_index) return value
def min_value(self, game_state: GameState, depth=0, ghost_index=0, alpha=-math.inf, beta=math.inf): # next_ghost_to_move ghost_index += 1 if self.is_a_new_level_of_search(game_state, ghost_index): depth = depth + 1 if game_state.isWin() or game_state.isLose(): return self.evaluationFunction(game_state) value = math.inf legal_actions = game_state.getLegalActions(ghost_index) for action in legal_actions: successor = game_state.getNextState(ghost_index, action) if self.is_a_new_level_of_search(game_state, ghost_index): # let's move on with pacman since this is the last agent (new max node) value = min(value, self.max_value(successor, depth=depth, alpha=alpha, beta=beta)) else: # next on the tree is another minimizer, lets continue with another ghost value = min(value, self.min_value(successor, depth=depth, ghost_index=ghost_index, alpha=alpha, beta=beta)) if value < alpha: return value beta = min(beta, value) return value
def enhancedPacmanFeatures(state, action): """ For each state, this function is called with each legal action. It should return a counter with { <feature name> : <feature value>, ... } """ features = util.Counter() # *** YOUR CODE HERE *** succGameState = state.generateSuccessor(0, action) dist = 0 for n in range(len(GameState.getGhostPositions(succGameState))): pac_location = GameState.getPacmanPosition(succGameState) ghost_loc = GameState.getGhostPositions(succGameState) dist += util.manhattanDistance(pac_location, ghost_loc[n]) feat = 'dist'+str(n) features[feat] = util.manhattanDistance(pac_location, ghost_loc[n]) if action == 'Stop': features['stopped'] += 1 features['dist'] = dist features['foodCount'] = GameState.getNumFood(succGameState) features['power_pellet'] = len(GameState.getCapsules(succGameState)) return features
def getAction(self, state): # survey agents GameState.getAndResetExplored() optimalActionLists = [] for agent in self.solutionAgents: optimalActionLists.append( ( agent.getBestPacmanActions(state)[0], len(GameState.getAndResetExplored()), ) ) alternativeDepthLists = [ agent.getBestPacmanActions(state)[0] for agent in self.alternativeDepthAgents ] partialPlyBugLists = [ agent.getBestPacmanActions(state)[0] for agent in self.partialPlyBugAgents ] # record responses self.optimalActionLists.append(optimalActionLists) self.alternativeDepthLists.append(alternativeDepthLists) self.partialPlyBugLists.append(partialPlyBugLists) self.stepCount += 1 random.seed(self.seed + self.stepCount) return optimalActionLists[0][0][0]
def get_action(self, state): """Get action from student's agent and compare with reference. Returns an optimal action from reference. """ GameState.get_and_reset_explored() student_action = (self.student_agent.get_action(state), len(GameState.get_and_reset_explored())) optimal_actions = self.optimal_actions[self.step_count] alt_depth_actions = self.alt_depth_actions[self.step_count] partial_ply_bug_actions = self.partial_ply_bug_actions[self.step_count] student_optimal_action = False current_right_states_explored = False for i in range(len(optimal_actions)): if student_action[0] in optimal_actions[i][0]: student_optimal_action = True else: self.actions_consistent_with_optimal[i] = False if student_action[1] == int(optimal_actions[i][1]): current_right_states_explored = True if (not current_right_states_explored and self.wrong_states_explored < 0): self.wrong_states_explored = 1 for i in range(len(alt_depth_actions)): if student_action[0] not in alt_depth_actions[i]: self.actions_consistent_with_alternative_depth[i] = False for i in range(len(partial_ply_bug_actions)): if student_action[0] not in partial_ply_bug_actions[i]: self.actions_consistent_with_partial_ply_bug[i] = False if not student_optimal_action: self.suboptimal_moves.append( (state, student_action[0], optimal_actions[0][0][0])) self.step_count += 1 random.seed(self.seed + self.step_count) return optimal_actions[0][0][0]
def _alphabeta(self, gameState: GameState, idx: int, ab: List[float]) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) pacman = (agent == 0) idx0 = int(pacman) idx1 = int(not pacman) mod = 1 if pacman else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._alphabeta(s, idx + 1, [*ab])[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction if best_score * mod > ab[idx0] * mod: break ab[idx1] = max(ab[idx1] * mod, best_score * mod) * mod return (best_score, best_action)
def findPathToClosestDot(self, gameState: pacman.GameState): """ Returns a path (a list of actions) to the closest dot, starting from gameState. """ # Here are some useful elements of the startState startPosition = gameState.getPacmanPosition() food = gameState.getFood() walls = gameState.getWalls() problem = AnyFoodSearchProblem(gameState) # we don't know where the closest dot is, so let's estimate the closest dot import time start_time = time.time() # print("food (%s): %s" %(type(food), food)) # getting the closest dot to set it as the goal problem.goal = __getClosestGoal__( startPosition, food.asList()) # so that the heuristic knows the goal import search astar = search.astar(problem, heuristic=euclideanHeuristic) print("findPathToClosestDot() took %2.5f seconds" % (time.time() - start_time)) return astar
def getAction(self, state): GameState.getAndResetExplored() studentAction = (self.studentAgent.getAction(state), sum(map(hash, GameState.getAndResetExplored()))) optimalActions = self.optimalActions[self.stepCount] altDepthActions = self.altDepthActions[self.stepCount] partialPlyBugActions = self.partialPlyBugActions[self.stepCount] studentOptimalAction = False curRightStatesExplored = False; for i in range(len(optimalActions)): if studentAction[0] in optimalActions[i][0]: studentOptimalAction = True else: self.actionsConsistentWithOptimal[i] = False if studentAction[1] == int(optimalActions[i][1]): curRightStatesExplored = True if not curRightStatesExplored and self.wrongStatesExplored < 0: self.wrongStatesExplored = 1 for i in range(len(altDepthActions)): if studentAction[0] not in altDepthActions[i]: self.actionsConsistentWithAlternativeDepth[i] = False for i in range(len(partialPlyBugActions)): if studentAction[0] not in partialPlyBugActions[i]: self.actionsConsistentWithPartialPlyBug[i] = False if not studentOptimalAction: self.suboptimalMoves.append((state, studentAction[0], optimalActions[0][0][0])) self.stepCount += 1 random.seed(self.seed + self.stepCount) return optimalActions[0][0][0]
def getMovement(layoutText, agentIndex, difficulty): gameState = GameState() numGhosts = layoutText.count('G') layoutText = layoutText.split('\n') layoutText = filter(None,layoutText) layout = Layout(layoutText) gameState.initialize(layout,numGhosts); if difficulty == 1: agent = ReflexAgent() agent.index = agentIndex return agent.getAction(gameState) elif difficulty == 2: agent = MinimaxAgent() agent.index = agentIndex return agent.getAction(gameState) elif difficulty == 2: agent = AlphaBetaAgent() agent.index = agentIndex agent.depth = 4 return agent.getAction(gameState) else: agent = RandomAgent() agent.index = agentIndex return agent.getAction(gameState)
def calculate_food_score(game_state: GameState): food_layout = game_state.getFood() initial_food = food_layout.width * food_layout.height return initial_food - game_state.getNumFood() """
def __init__(self, startingGameState: pacman.GameState): self.start = (startingGameState.getPacmanPosition(), startingGameState.getFood()) self.walls = startingGameState.getWalls() self.startingGameState = startingGameState self._expanded = 0 # DO NOT CHANGE self.heuristicInfo = { } # A dictionary for the heuristic to store information
def __init__(self, startingGameState: pacman.GameState): """ Stores the walls, pacman's starting position and corners. """ self.walls = startingGameState.getWalls() self.startingPosition = startingGameState.getPacmanPosition() top, right = self.walls.height - 2, self.walls.width - 2 self.corners = ((1, 1), (1, top), (right, 1), (right, top)) for corner in self.corners: if not startingGameState.hasFood(*corner): print('Warning: no food in corner ' + str(corner)) self._expanded = 0 # DO NOT CHANGE; Number of search nodes expanded # Please add any code here which you would like to use # in initializing the problem "*** YOUR CODE HERE ***" self.startState = PositionWithFoods(self.startingPosition, self.corners) self.to_foods: Dict[Coordinate, Dict[Coordinate, float]] = {} for c in self.corners: queue = util.Queue() queue.push((c, 0)) distances = {} while not queue.isEmpty(): pos, dis = queue.pop() x, y = pos if pos in distances: continue distances[pos] = dis for action in DIRECTIONS: dx, dy = Actions.directionToVector(action) next_pos = int(x + dx), int(y + dy) nextx, nexty = next_pos if next_pos not in distances and not self.walls[nextx][ nexty]: queue.push((next_pos, dis + 1)) self.to_foods[c] = distances self.to_other_foods = {} def tmp(foods, curr): key = (curr, tuple(sorted(foods))) if len(foods) == 1: self.to_other_foods[key] = 0 return 0 if key in self.to_other_foods: return self.to_other_foods[key] left_overs = list(foods) left_overs.remove(curr) self.to_other_foods[key] = min( map(lambda c: self.to_foods[curr][c] + tmp(left_overs, c), left_overs)) return self.to_other_foods[key] for c in self.corners: tmp(self.corners, c)
def getAction(self, gameState: GameState) -> str: """ Returns the minimax action using self.depth and self.evaluationFunction """ # BEGIN_YOUR_CODE (our solution is 36 lines of code, but don't worry if you deviate from this) def getVal(s, d, agentIndex, alpha=float('-inf'), beta=float('inf'), evalFn=self.evaluationFunction): nextAgentIndex = 0 if agentIndex == s.getNumAgents( ) - 1 else agentIndex + 1 actions = s.getLegalActions(agentIndex) if len(actions) == 0: return s.getScore() elif d == 0: if agentIndex != 0: raise Exception( f"Unexpected agentIndex {agentIndex} != {0}") return evalFn(s) elif agentIndex == 0: maxVal = float('-inf') # actions.sort(key=lambda a: evalFn(s.generateSuccessor(agentIndex, a)), reverse=True) for a in actions: maxVal = max( maxVal, getVal(s.generateSuccessor(agentIndex, a), d, nextAgentIndex, alpha, beta)) alpha = max(alpha, maxVal) if alpha >= beta: break return maxVal else: nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0) minVal = float('inf') # actions.sort(key=lambda a: evalFn(s.generateSuccessor(agentIndex, a)), reverse=False) for a in actions: minVal = min( minVal, getVal(s.generateSuccessor(agentIndex, a), nextD, nextAgentIndex, alpha, beta)) beta = min(beta, minVal) if alpha >= beta: break return minVal targetVal = getVal(gameState, self.depth, 0) # print(f"AlphaBetaAgent value of state = {targetVal}") legalActions = gameState.getLegalActions(0) actions = [ a for a in legalActions if getVal( gameState.generateSuccessor(0, a), self.depth, 1) == targetVal ] return random.choice(actions)
def __init__(self, gameState: pacman.GameState): "Stores information from the gameState. You don't need to change this." # Store the food for later reference self.food = gameState.getFood() # Store info for the PositionSearchProblem (no need to change this) self.walls = gameState.getWalls() self.startState = gameState.getPacmanPosition() self.costFn = lambda x: 1 self._visited, self._visitedlist, self._expanded = {}, [], 0 # DO NOT CHANGE
def processGames(self): maxNumTraining = DatatypeUtils.stringToInteger(self.view.numTrainingVar.get()) increment = DatatypeUtils.stringToInteger(self.view.incrementVar.get()) #from pympler import tracker #memory_tracker = tracker.SummaryTracker() for numTraining in range(0, maxNumTraining, increment): self.processGame(numTraining) GameState.getAndResetExplored()
def calculate_capsule_score(game_state: GameState): capsules = game_state.getCapsules() min_distance = math.inf distance_coefficient = 130 for capsule_position in capsules: pacman_distance_from_capsule = util.manhattanDistance(game_state.getPacmanPosition(), capsule_position) min_distance = min(min_distance, pacman_distance_from_capsule) return distance_coefficient / min_distance
def __init__(self, ghostAgents, display, rules, layout, percentRandomize=0.5): Game.__init__(self, ghostAgents, display, rules) initState = GameState() initState.initialize(layout, len(ghostAgents), percentRandomize) self.state = initState self.initialState = initState.deepCopy()
def minimax(evalFunc: classmethod, agent: int, depth: int, gameState: GameState, maxDepth: int) -> float: if gameState.isLose() or gameState.isWin() or depth == maxDepth: return evalFunc(gameState) if agent == 0: return max(minimax(evalFunc, 1, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent)) else: nextAgent = agent + 1 if gameState.getNumAgents() == nextAgent: nextAgent = 0 if nextAgent == 0: depth += 1 return min(minimax(evalFunc, nextAgent, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent))
class MultiAgentSearchAgent(Agent): """ This class provides some common elements to all of your multi-agent searchers. Any methods defined here will be available to the MinimaxPacmanAgent, AlphaBetaPacmanAgent & ExpectimaxPacmanAgent. You *do not* need to make any changes here, but you can if you want to add functionality to all your adversarial search agents. Please do not remove anything, however. Note: this is an abstract class: one that should not be instantiated. It's only partially specified, and designed to be extended. Agent (game.py) is another abstract class. """ moveToAction = { Directions.WEST: PacmanAction.WEST, Directions.EAST: PacmanAction.EAST, Directions.NORTH: PacmanAction.NORTH, Directions.SOUTH: PacmanAction.SOUTH, Directions.STOP: PacmanAction.STOP,} actionToMovement = {PacmanAction.WEST: Directions.WEST, PacmanAction.EAST: Directions.EAST, PacmanAction.NORTH: Directions.NORTH, PacmanAction.SOUTH: Directions.SOUTH, PacmanAction.STOP: Directions.STOP} def __init__(self, evalFn = 'scoreEvaluationFunction', depth = '2'): self.index = 0 # Pacman is always agent index 0 self.evaluationFunction = scoreEvaluationFunction self.depth = int(depth) self.gameState = GameState() self.initializeGameState() rospy.Subscriber("/pacman_interface/agent_action", AgentAction, self.agentActionCallback) rospy.Service('get_action', PacmanGetAction, self.getAction) rospy.spin() def initializeGameState(self): rospy.wait_for_service('pacman_inialize_game_state') try: getInitializationInfo = rospy.ServiceProxy('pacman_inialize_game_state', PacmanInitializationInfo) initInfo = getInitializationInfo() thisLayout = layout.getLayout(initInfo.layout) numGhosts = initInfo.numGhosts self.gameState.initialize(thisLayout, numGhosts) print "Game initialized" except rospy.ServiceException, e: print "Service call failed: %s"%e
def __init__(self, start_game_state: GameState): super().__init__(start_game_state) self._expanded = 0 # DO NOT CHANGE; Number of search nodes expanded self.startingPosition = start_game_state.getPacmanPosition() self.capsules = tuple(start_game_state.getCapsules()) self.foods = start_game_state.getFood() self.walls = start_game_state.getWalls() self.costFn = lambda x: 1 self.start_game_state = start_game_state self.is_eating_capsule = True
def findPathToClosestDot(self, gameState: pacman.GameState): """ Returns a path (a list of actions) to the closest dot, starting from gameState. """ # Here are some useful elements of the startState startPosition = gameState.getPacmanPosition() food = gameState.getFood() walls = gameState.getWalls() problem = AnyFoodSearchProblem(gameState) "*** YOUR CODE HERE ***" return search.bfs(problem)
def calculate_food_distance_score(game_state: GameState): food_layout = game_state.getFood() min_distance = math.inf distance_coefficient = 100 for x in range(food_layout.width): for y in range(food_layout.height): # has food if food_layout[x][y]: pacman_distance_from_food = util.manhattanDistance(game_state.getPacmanPosition(), (x, y)) min_distance = min(min_distance, pacman_distance_from_food) return distance_coefficient / min_distance
def max_value(self, game_state: GameState, depth): if self.is_terminal_state(game_state, depth): return self.evaluationFunction(game_state) value = -math.inf legal_actions = game_state.getLegalActions(self.pacman_index) for action in legal_actions: successor = game_state.getNextState(self.pacman_index, action) value = max(value, self.exp_value(successor, depth=depth, ghost_index=0)) return value
def getAction(self, state): # survey agents GameState.getAndResetExplored() optimalActionLists = [] for agent in self.solutionAgents: optimalActionLists.append((agent.getBestPacmanActions(state)[0], sum(map(hash, GameState.getAndResetExplored())))) alternativeDepthLists = [agent.getBestPacmanActions(state)[0] for agent in self.alternativeDepthAgents] partialPlyBugLists = [agent.getBestPacmanActions(state)[0] for agent in self.partialPlyBugAgents] # record responses self.optimalActionLists.append(optimalActionLists) self.alternativeDepthLists.append(alternativeDepthLists) self.partialPlyBugLists.append(partialPlyBugLists) self.stepCount += 1 random.seed(self.seed + self.stepCount) return optimalActionLists[0][0][0]
def getAction(self, gameState: GameState): """ You do not need to change this method, but you're welcome to. getAction chooses among the best options according to the evaluation function. Just like in the previous project, getAction takes a GameState and returns some Directions.X for some X in the set {NORTH, SOUTH, WEST, EAST, STOP} """ # Collect legal moves and successor states legalMoves = gameState.getLegalActions() # print(legalMoves) # Choose one of the best actions scores = [self.evaluationFunction( gameState, action) for action in legalMoves] # print(scores) bestScore = max(scores) # print(bestScore) bestIndices = [index for index in range( len(scores)) if scores[index] == bestScore] # print(bestIndices) # Pick randomly among the best chosenIndex = random.choice(bestIndices) # print(chosenIndex) "Add more of your code here if you want to" return legalMoves[chosenIndex]
def dar_pedazo_de_imagenstate(state: GameState, policy): pos_pacman = state.getPacmanPosition() imagen = dar_features(policy, state) lim_filas_der = imagen.shape[0] lim_filas_izq = 0 lim_column_abajo = imagen.shape[1] lim_column_arriba = 0 filas_plus = pos_pacman[0] + 3 if pos_pacman[0] + 2 < imagen.shape[ 0] else lim_filas_der filas_minus = pos_pacman[0] - 2 if pos_pacman[0] >= 2 else lim_filas_izq colum_plus = pos_pacman[1] + 3 if pos_pacman[1] + 2 < imagen.shape[ 1] else lim_column_abajo colum_minus = pos_pacman[1] - 2 if pos_pacman[1] >= 2 else lim_column_arriba pedazo = np.zeros((5, 5)) pedazo_imagen = imagen[filas_minus:filas_plus, colum_minus:colum_plus] for i in range(pedazo_imagen.shape[0]): for j in range(pedazo_imagen.shape[1]): pedazo[i, j] = pedazo_imagen[i, j] pedazo = np.ravel(pedazo) pedazo = np.append(pedazo, pos_pacman[0]) pedazo = np.append(pedazo, pos_pacman[1]) return pedazo
def getAction(self, gameState: GameState): """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. """ _max = float("-inf") action = None for move in gameState.getLegalActions(0): util = minimax(self.evaluationFunction, 1, 0, gameState.generateSuccessor(0, move), self.depth) if util > _max or _max == float("-inf"): _max = util action = move return action
def evaluationFunction(self, currentGameState: GameState, action: str) -> float: """ Design a better evaluation function here. The evaluation function takes in the current and proposed successor GameStates (pacman.py) and returns a number, where higher numbers are better. The code below extracts some useful information from the state, like the remaining food (newFood) and Pacman position after moving (newPos). newScaredTimes holds the number of moves that each ghost will remain scared because of Pacman having eaten a power pellet. Print out these variables to see what you're getting, then combine them to create a masterful evaluation function. """ # Useful information you can extract from a GameState (pacman.py) successorGameState = currentGameState.generatePacmanSuccessor(action) walls = successorGameState.getWalls() width = walls.width height = walls.height newPos = successorGameState.getPacmanPosition() newFood = successorGameState.getFood() newGhostStates = successorGameState.getGhostStates() newScaredTimes = [ghostState.scaredTimer for ghostState in newGhostStates] "*** YOUR CODE HERE ***" ghosts = Grid(width, height) for i in range(len(newGhostStates)): if newScaredTimes[i] <= 0: x, y = newGhostStates[i].getPosition() ghosts[int(x)][int(y)] = True queue = util.Queue() queue.push((newPos, 0)) visited = set() shortest = float('inf') ghosts_dis = [] while not queue.isEmpty(): cur, dis = queue.pop() x, y = cur if in_range(cur, width, height) and not walls[x][y] and cur not in visited: visited.add(cur) if newFood[x][y]: shortest = min(dis, shortest) if ghosts[x][y]: ghosts_dis.append(dis) for d in DIRECTIONS: queue.push(((x + d[0], y + d[1]), dis + 1)) if shortest == float('inf'): shortest = 0 score = successorGameState.getScore() def d(x): if x == 0: return float('inf') return 9 / (x**2) score -= shortest + sum(map(d, ghosts_dis)) if action == 'Stop': score -= 10 return score
def dar_features(policy, state: GameState): if not policy.use_image: posicion_pacman = state.getPacmanPosition() posicion_fantasma = state.getGhostPosition(1) temp = np.nonzero(np.array(state.getFood().data)) if state.data._win: posicion_comida = posicion_pacman else: posicion_comida = (int(temp[0]), int(temp[1])) distancia_a_comida = np.linalg.norm( np.array(posicion_pacman) - np.array(posicion_comida)) res = [distancia_a_comida ] + list(posicion_pacman) + list(posicion_fantasma) return res else: return np.array(policy.mapeo_fn(str(state))).reshape( policy.height, policy.width, 1)
def __init__(self, evalFn = 'scoreEvaluationFunction', depth = '2'): self.index = 0 # Pacman is always agent index 0 self.evaluationFunction = scoreEvaluationFunction self.depth = int(depth) self.gameState = GameState() self.initializeGameState() rospy.Subscriber("/pacman_interface/agent_action", AgentAction, self.agentActionCallback) rospy.Service('get_action', PacmanGetAction, self.getAction) rospy.spin()
def __init__(self, layout, agents, display, state_repr='stack', muteAgents=False, catchExceptions=False): ''' state_repr: state representation, possible values ['stack', 'k-frames', 'dict'] 'stack' - stack walls, food, ghost and pacman representation into a 4D tensor. 'dict' - return the raw dict representation keys=['walls', 'food', 'ghost', 'pacman'], values are matrix/tensor. 'k-frames' - instead of directional descriptors for pacman and ghost, use static descriptors and capture past k frames. ''' # parse state representation. self.state_repr = state_repr self.layout = layout self.agents = agents self.display = display self.muteAgents = muteAgents self.catchExceptions = catchExceptions if self.state_repr.endswith('frames'): bar_pos = self.state_repr.rfind('frames') self.state_k = int(self.state_repr[:bar_pos-1]) self.state_history = [] self.init_state = GameState() self.init_state.initialize(layout, len(agents)) self.game_rule = ClassicGameRules(timeout=100) self.myagent = GameNoWaitAgent() self.init_game = Game([self.myagent] + agents[:layout.getNumGhosts()], display, self.game_rule, catchExceptions = catchExceptions) self.init_game.state = self.init_state # action mapping. self.all_actions = Actions._directions.keys() self.action_to_dir = {action_i: action for (action_i, action) in enumerate(self.all_actions)} self.dir_to_action = {action: action_i for (action_i, action) in enumerate(self.all_actions)} def start_game(): self.game = self.init_game.deepCopy() self.game.init() self.start_game = start_game start_game()
return agent.getAction(gameState) elif difficulty == 2: agent = MinimaxAgent() agent.index = agentIndex return agent.getAction(gameState) elif difficulty == 2: agent = AlphaBetaAgent() agent.index = agentIndex agent.depth = 4 return agent.getAction(gameState) else: agent = RandomAgent() agent.index = agentIndex return agent.getAction(gameState) x = GameState() layoutText = """ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % % % % %%% % %%%% %%%% % %%% % % % % % % % % %% %% % % % % % % % % % % % % % % % % % % % %%% % % % %%% %%% % % % %%% % % % % % % % % % % % %%% % % % %% %% % % % %%% % % % % % % % % % % % % %%%% % % % % % % %%%% % % % % % % % % % % %%%% % %%%%% %%%%% % %%%% % % %%% % % %%% %
class PacmanTask(Task): def __init__(self, layout, agents, display, state_repr='stack', muteAgents=False, catchExceptions=False): ''' state_repr: state representation, possible values ['stack', 'k-frames', 'dict'] 'stack' - stack walls, food, ghost and pacman representation into a 4D tensor. 'dict' - return the raw dict representation keys=['walls', 'food', 'ghost', 'pacman'], values are matrix/tensor. 'k-frames' - instead of directional descriptors for pacman and ghost, use static descriptors and capture past k frames. ''' # parse state representation. self.state_repr = state_repr self.layout = layout self.agents = agents self.display = display self.muteAgents = muteAgents self.catchExceptions = catchExceptions if self.state_repr.endswith('frames'): bar_pos = self.state_repr.rfind('frames') self.state_k = int(self.state_repr[:bar_pos-1]) self.state_history = [] self.init_state = GameState() self.init_state.initialize(layout, len(agents)) self.game_rule = ClassicGameRules(timeout=100) self.myagent = GameNoWaitAgent() self.init_game = Game([self.myagent] + agents[:layout.getNumGhosts()], display, self.game_rule, catchExceptions = catchExceptions) self.init_game.state = self.init_state # action mapping. self.all_actions = Actions._directions.keys() self.action_to_dir = {action_i: action for (action_i, action) in enumerate(self.all_actions)} self.dir_to_action = {action: action_i for (action_i, action) in enumerate(self.all_actions)} def start_game(): self.game = self.init_game.deepCopy() self.game.init() self.start_game = start_game start_game() def deep_copy(self): agents = list(self.agents) task = PacmanTask(self.layout, agents, self.display, self.state_repr, self.muteAgents, self.catchExceptions) task.game = self.game.deepCopy() task.myagent = self.myagent # TODO: agents not deep copy. return task @property def curr_state_dict(self): return self.game.state.data.array() @property def curr_state(self): if self.state_repr == 'dict': return self.curr_state_dict elif self.state_repr == 'stack': state_dict = self.curr_state_dict if len(state_dict.get('ghosts')) == 0: ghost_stacked = np.zeros_like(state_dict['pacman']) else: ghost_stacked = np.sum(state_dict['ghosts'], axis=0) state = np.array( [ state_dict['food'], state_dict['wall'] ] + [ state_dict['pacman'][:, :, i] for i in range(4) ] + [ ghost_stacked[:, :, i] for i in range(4) ] ) return state elif hasattr(self, 'state_k'): state_history = self.state_history + [self.curr_state_dict] def stack_direction(state_agent): return np.sum(state_agent, axis=2) state = [] k = 0 for hist_dict in state_history[::-1]: state.extend([ hist_dict['food'], hist_dict['wall'], stack_direction(hist_dict['pacman']) ] + sum( [ [stack_direction(ghost) for ghost in hist_dict['ghosts']] ], [] ) ) k += 1 state = np.array(state) frame_dim = state.shape[0] / k for ki in range(k, self.state_k + 1): state = np.concatenate((state, np.zeros_like(state[:frame_dim, :, :])), axis=0) return state def is_end(self): return self.game.gameOver @property def num_actions(self): return len(self.all_actions) @property def valid_actions(self): dirs = self.game.state.getLegalPacmanActions() return [self.dir_to_action[dir] for dir in dirs] def step(self, action): if hasattr(self, 'state_k'): # if we use past frames. self.state_history.append(self.curr_state_dict) if len(self.state_history) > self.state_k: self.state_history = self.state_history[-self.state_k:] if action not in self.valid_actions: # TODO: hack. action = self.dir_to_action[Directions.STOP] # convert action to direction. direction = self.action_to_dir[action] old_score = self.game.state.data.score # run the game using the direction. self.myagent.next_action = direction self.game.run_one() new_score = self.game.state.data.score reward = new_score - old_score if self.is_end(): self.game.finalize() return reward def reset(self): self.start_game() @property def state_shape(self): return self.curr_state.shape def __str__(self): return str(self.game.state) def __repr__(self): return str(self.game.state)
def generateGameState(args): layout = generateLayout(args) gameState = GameState() numGhostAgents = 1 gameState.initialize(layout, numGhostAgents) return gameState
def generateAllStates(length, ghostNum = 1): #length of all possible spaces. Do not set the ghost num allStatesWithoutP = [] for k in range(0, 4**length): layout = util.base10toN(k, 4, length) allStatesWithoutP.append(layout) allValidStates = [] for k in allStatesWithoutP: zerocount = 0 for x in range(0, len(k)): if k[x] == "0": zerocount += 1 if zerocount == (ghostNum+1): allValidStates.append(k) allLayouts = [] for k in allValidStates: #hardcoded for only ONE GHOST!! tempstring1 = "" tempstring2 = "" switcher = True for x in range(0, len(k)): if k[x] == "0": if switcher: tempstring1 += "4" tempstring2 += "5" else: tempstring1 += "5" tempstring2 += "4" switcher = False else: tempstring1 += k[x] tempstring2 += k[x] allLayouts.append(tempstring1) allLayouts.append(tempstring2) for k in range(0, len(allLayouts)): state = allLayouts[k] newstate = "%" for x in range(0, len(state)): if state[x] == "1": newstate+=" " elif state[x] == "2": newstate += "." elif state[x] == "3": newstate+= "o" elif state[x] == "4": newstate+= "P" elif state[x] == "5": newstate+= "G" newstate+= "%" layouttext = [] layouttext.append("%"*(length+2)) #HARDCODE layouttext.append(newstate) layouttext.append("%"*(length+2)) #HARDCODE allLayouts[k] = layouttext #print layouttext allStates = [] for k in range(0, len(allLayouts)): layout = Layout(allLayouts[k]) gameState = GameState() gameState.initialize(layout, 1) #ghost hardcoded allStates.append(gameState) return allStates