def getFeatures(self, gameState, action): # print("start get features") successor = self.getSuccessor(gameState, action) features = { "food": 0, "foodCarry": 0, "distanceHome": 0, "capsule": 0, "ghostMin": 0, "ghostMax": 0, "ways": 0, "friend": 0, "b": 1 } if self.getTeamateDistance(successor) <= 5: features["friend"] = self.getTeamateDistance( gameState) - self.getTeamateDistance(successor) if len(self.getCapsules(gameState)) - len( self.getCapsules(successor)) == 1: if (not self.getEnermy(gameState)[0][3]) or ( not self.getEnermy(gameState)[1][3]): features["capsule"] = 2000 if (self.getEnermy(gameState)[0][0] and self.getEnermy(gameState)[0][1] is not None and self.getEnermyDistanceToMe(gameState)[0] < 4 and (not gameState.getAgentState(self.index).isPacman) and gameState.getAgentState(self.index).scaredTimer == 0 and (not self.getEnermy(gameState)[0][3]) and len(self.getCapsulesYouAreDefending(gameState)) == 0)\ or (self.getEnermy(gameState)[0][0] and self.defence and self.getEnermy(gameState)[0][1] is not None and gameState.getAgentState(self.index).scaredTimer == 0 and (not gameState.getAgentState(self.index).isPacman)): features["ghostMin"] = self.getEnermy( successor)[0][2] - self.getEnermy(gameState)[0][2] if features["ghostMin"] > 1: features["ghostMin"] = -1000 # print("chaseA", action, features) return util.Counter(features) #chase pacman if (self.getEnermy(gameState)[1][0] and self.getEnermy(gameState)[1][1] is not None\ and self.getEnermyDistanceToMe(gameState)[1] < 4 and (not gameState.getAgentState(self.index).isPacman)\ and gameState.getAgentState(self.index).scaredTimer == 0 and (not self.getEnermy(gameState)[0][3]) and len(self.getCapsulesYouAreDefending(gameState)) == 0)\ or (self.getEnermy(gameState)[1][0] and self.defence and self.getEnermy(gameState)[1][1] is not None and gameState.getAgentState(self.index).scaredTimer == 0 and (not gameState.getAgentState(self.index).isPacman)): features["ghostMax"] = self.getEnermy( successor)[1][2] - self.getEnermy(gameState)[1][2] if features["ghostMax"] > 1: features["ghostMax"] = -1000 # print("chaseB", action, features) return util.Counter(features) # chase pacman if (self.getEnermyDistanceToMe(gameState)[0] < 4 and gameState.getAgentState(self.index).isPacman and not self.getEnermy(gameState)[0][3])\ or (self.getEnermyDistanceToMe(gameState)[1] < 4 and gameState.getAgentState(self.index).isPacman and not self.getEnermy(gameState)[1][3])\ or (self.myDistance[self.start] > 10000 and min(self.getCapsuleDistance(gameState)) > 10000): if len(self.food) != 0: self.targetFood = random.choice( self.getFood(gameState).asList()) if len(self.getCapsules(gameState)) == 0 or min( self.getCapsuleDistance(gameState)) > 10000: features["friend"] = self.getTeamateDistance( gameState) - self.getTeamateDistance(successor) features["ways"] = len(successor.getLegalActions( self.index)) - len(gameState.getLegalActions(self.index)) if self.getHomeDistance(gameState) < 10000: features["distanceHome"] = self.getHomeDistance( gameState) - self.getHomeDistance(successor) else: print("no way home, try your skills, be Messi") features["distanceHome"] = self.getHome( gameState) - self.getHome(successor) features["ways"] = 0 # print("escapeForHome", action, features) # print("distanceToHome", self.getHomeDistance(gameState)) # print(gameState) else: if features["capsule"] != 2000: features["capsule"] = min( self.getCapsuleDistance(gameState)) - min( self.getCapsuleDistance(successor)) # print("escapeForCap", action, features) return util.Counter(features) if self.goBackScore(gameState, successor) or successor.data._win: features["foodCarry"] = abs( successor.getAgentState(self.index).numCarrying - gameState.getAgentState(self.index).numCarrying) * 300 features["distanceHome"] = self.getHomeDistance( gameState) - self.getHomeDistance(successor) # print("goBackscore",action, features) return util.Counter(features) if self.goBack(gameState, successor): e1, e2 = self.getEnermy(gameState) features["friend"] = self.getTeamateDistance( gameState) - self.getTeamateDistance(successor) features["distanceHome"] = self.getHomeDistance( gameState) - self.getHomeDistance(successor) if self.saveFriend(gameState) and gameState.getAgentState( (self.index + 2) % 4).isPacman: # print("Save your friend") features["capsule"] = 2 * features["capsule"] if not gameState.getAgentState(self.index).isPacman: features["distanceHome"] = 0 features["b"] = 0 features["friend"] = 0 # print("finish and stay for defend") if gameState.getAgentState(self.index).scaredTimer > 0: if e1[0] and (e1[1] is not None): features["ghostMin"] = abs(e1[2] - 2) if e2[0] and (e2[1] is not None): features["ghostMin"] = abs(e2[2] - 2) # print("goBack", action, features) return util.Counter(features) if successor.getAgentState( self.index).numCarrying - gameState.getAgentState( self.index).numCarrying == 1: # eating food features["food"] = 1 features["foodCarry"] = successor.getAgentState( self.index).numCarrying - gameState.getAgentState( self.index).numCarrying # print("eatingFood", action, features) return util.Counter(features) if self.goEat(gameState, successor): # print("food Safe") e1, e2 = self.getEnermy(successor) if self.getTargetFoodDistance(gameState) == 99999: features["food"] = self.getMazeDistance( self.myPosition, self.targetFood) - self.getMazeDistance( successor.getAgentPosition(self.index), self.targetFood) else: features["food"] = self.getTargetFoodDistance( gameState) - self.getTargetFoodDistance(successor) if features["capsule"] != 2000: features["capsule"] = min( self.getCapsuleDistance(gameState)) - min( self.getCapsuleDistance(successor)) if features["capsule"] < -1: features["capsule"] = 1 if (not gameState.getAgentState(self.index).isPacman ) and self.getEnermyDistanceToMe(gameState)[0] < 4 and ( not e1[0]) and (not e1[3]) and (e1[1] is not None): print("wondering") if len(self.food) != 0: self.targetFood = random.choice(self.food) features["food"] = e1[2] / 10 features["capsule"] = 0 if (not gameState.getAgentState( self.index).isPacman) and self.getEnermyDistanceToMe( gameState)[1] < 4 and e2[2] < e1[2] and ( not e2[0]) and (not e2[3]) and (e2[1] is not None): print("wondering") if len(self.food) != 0: self.targetFood = random.choice(self.food) features["food"] = e2[2] / 10 features["capsule"] = 0 features["foodCarry"] = successor.getAgentState( self.index).numCarrying - gameState.getAgentState( self.index).numCarrying # if self.saveFriend(gameState) and min( self.getCapsuleDistance(gameState)) < 10000: # print("Save your friend") features["capsule"] = 2 * features["capsule"] # print("goEat", action, features) return util.Counter(features) print("what???") return util.Counter(features)
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.values = util.Counter() # A Counter is a dict with default 0
def __init__(self, extractor='IdentityExtractor', **args): self.featExtractor = util.lookup(extractor, globals())() PacmanQAgent.__init__(self, **args) self.weights = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.qValues = util.Counter()
def __init__(self, extractor='IdentityExtractor', **args): self.featExtractor = util.lookup(extractor, globals())() PacmanQAgent.__init__(self, **args) # You might want to initialize weights here. self.weight = util.Counter()
def getFeatures(self, gameState, action): """ Get features used for state evaluation. """ features = util.Counter() successor = self.getSuccessor(gameState, action) # Compute score from successor state features['successorScore'] = self.agent.getScore(successor) # get current position of the agent CurrentPosition = successor.getAgentState(self.index).getPosition() # Compute the distance to the nearest boundary boundaryMin = 1000000 for i in range(len(self.boundary)): disBoundary = self.agent.getMazeDistance(CurrentPosition, self.boundary[i]) if (disBoundary < boundaryMin): boundaryMin = disBoundary features['returned'] = boundaryMin features['carrying'] = successor.getAgentState(self.index).numCarrying # Compute distance to the nearest food foodList = self.agent.getFood(successor).asList() if len(foodList) > 0: minFoodDistance = 99999 for food in foodList: distance = self.agent.getMazeDistance(CurrentPosition, food) if (distance < minFoodDistance): minFoodDistance = distance features['distanceToFood'] = minFoodDistance # Compute distance to the nearest capsule capsuleList = self.agent.getCapsules(successor) if len(capsuleList) > 0: minCapsuleDistance = 99999 for c in capsuleList: distance = self.agent.getMazeDistance(CurrentPosition, c) if distance < minCapsuleDistance: minCapsuleDistance = distance features['distanceToCapsule'] = minCapsuleDistance else: features['distanceToCapsule'] = 0 # Compute distance to closest ghost opponentsState = [] for i in self.agent.getOpponents(successor): opponentsState.append(successor.getAgentState(i)) visible = filter(lambda x: not x.isPacman and x.getPosition() != None, opponentsState) if len(visible) > 0: positions = [agent.getPosition() for agent in visible] closest = min( positions, key=lambda x: self.agent.getMazeDistance(CurrentPosition, x)) closestDist = self.agent.getMazeDistance(CurrentPosition, closest) if closestDist <= 5: # print(CurrentPosition,closest,closestDist) features['GhostDistance'] = closestDist else: probDist = [] for i in self.agent.getOpponents(successor): probDist.append(successor.getAgentDistances()[i]) features['GhostDistance'] = min(probDist) # Attacker only try to kill the enemy if : itself is ghost form and the distance between him and the ghost is less than 4 enemiesPacMan = [ successor.getAgentState(i) for i in self.agent.getOpponents(successor) ] Range = filter(lambda x: x.isPacman and x.getPosition() != None, enemiesPacMan) if len(Range) > 0: positions = [agent.getPosition() for agent in Range] closest = min( positions, key=lambda x: self.agent.getMazeDistance(CurrentPosition, x)) closestDist = self.agent.getMazeDistance(CurrentPosition, closest) if closestDist < 4: # print(CurrentPosition,closest,closestDist) features['distanceToEnemiesPacMan'] = closestDist else: features['distanceToEnemiesPacMan'] = 0 return features
def initializeWeightsToZero(self): "Resets the weights of each label to zero vectors" self.weights = {} for label in self.legalLabels: self.weights[label] = util.Counter( ) # this is the data-structure you should use
def __init__(self, mdp, discount=0.9, iterations=100): """ Your value iteration agent should take an mdp on construction, run the indicated number of iterations and then act according to the resulting policy. Some useful mdp methods you will use: mdp.getStates() mdp.getPossibleActions(state) mdp.getTransitionStatesAndProbs(state, action) mdp.getReward(state, action, nextState) mdp.isTerminal(state) """ super(PolicyIterationAgent, self).__init__() import mdp as mdp_module self.mdp: mdp_module.MarkovDecisionProcess = mdp self.discount = discount print("using discount {}".format(discount)) self.iterations = iterations self.values = util.Counter() # A Counter is a dict with default 0 self.policy: Dict[Tuple[int, int], str] = { state: self.mdp.getPossibleActions(state)[0] if len(self.mdp.getPossibleActions(state)) > 0 else None for state in self.mdp.getStates() } delta = 0.01 # TODO: Implement Policy Iteration. for i in range(self.iterations): # Iterate until V converges, using the current policy while True: old_values = self.values.copy() for state in self.mdp.getStates(): self.values[state] = sum([ prob * (self.mdp.getReward(state, self. policy[state], next_state) + self.discount * old_values[next_state]) for next_state, prob in self.mdp. getTransitionStatesAndProbs(state, self.policy[state]) ] if self.policy[state] is not None else []) # Calculate Euclidean distance and break if not different enough if sum([x**2 for x in (self.values - old_values).values() ])**.5 < delta: break # Iterate the policy old_policy = self.policy.copy() for state in self.mdp.getStates(): self.policy[state] = None \ if len(self.mdp.getPossibleActions(state)) <= 0 \ else max(self.mdp.getPossibleActions(state), key=lambda action: sum([prob * ( self.mdp.getReward( state, action, next_state) + (self.discount * self.values[next_state]) ) for next_state, prob in self.mdp.getTransitionStatesAndProbs( state, action)] + [0] ) ) if self.policy == old_policy: print(f"policy convergence after {i} iterations") break
def observe(self, gameState): distances = gameState.getAgentDistances() isRed = self.red actual_distances = {} for i in range(len(distances)): if not isRed and i in gameState.getRedTeamIndices(): actual_distances[i] = distances[i] elif isRed and i in gameState.getBlueTeamIndices(): actual_distances[i] = distances[i] pos = gameState.getAgentState(self.index) pos = pos.getPosition() new_distributions = {} for key in actual_distances: new_distributions[key] = util.Counter() for position in self.legalPositions: dist = distanceCalculator.manhattanDistance(position, pos) new_distributions[key][position] = gameState.getDistanceProb(dist, actual_distances[key]) if hasattr(self, 'distributions'): for key in actual_distances: for entry in new_distributions[key]: self.distributions[key][entry] *= new_distributions[key][entry] else: self.distributions = new_distributions for key in actual_distances: new_d = util.Counter() for position in self.legalPositions: val = self.distributions[key][position] left = (position[0]-1, position[1]) right = (position[0]+1, position[1]) top = (position[0], position[1]-1) bot = (position[0], position[1]+1) new_d[position] += val if left in self.legalPositions: new_d[left] += val if right in self.legalPositions: new_d[right] += val if top in self.legalPositions: new_d[top] += val if bot in self.legalPositions: new_d[bot] += val new_d.normalize() self.distributions[key] = new_d # Printing distribution routine for debugging """ for key in self.distributions: best_positions = [] best_prob = 0 d = self.distributions[key] for entry in self.distributions[key]: if d[entry] > best_prob: best_prob = d[entry] best_positions = [entry] elif d[entry] == best_prob: best_positions.append(entry) predicted = random.choice(best_positions) print predicted arr = [[0 for x in range(31)] for y in range(15)] for element in self.distributions[key]: arr[element[1]][element[0]] = self.distributions[key][element] for r in range(15,0,-1): for c in range(31): if (c,r) == predicted: print '@', elif (c, r) in self.legalPositions: print '-' if arr[r][c] else ' ', else: print "#", print """ for key in self.distributions: allZero = True for entry in self.distributions[key]: if self.distributions[key][entry]: allZero = False if allZero: self.distributions = new_distributions return
def getOffenseFeatures(self, gameState, action): ''' Retrieve the values for the offensive features :param gameState: The current game state to evaluate :param action: The selected action to take :returns: The selected feature values ''' # ----------------------------------------------------- # initialize settings # ----------------------------------------------------- features = util.Counter() successor = self.getSuccessor(gameState, action) state = successor.getAgentState(self.index) position = state.getPosition() features['successorScore'] = self.getScore(successor) features['stop'] = (action == Directions.STOP) # ----------------------------------------------------- # Computes distance to defenders we can see # ----------------------------------------------------- enemies = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] invaders = [ a for a in enemies if not a.isPacman and a.getPosition() != None ] if len(invaders) > 0 and state.isPacman: dist = min([ self.getMazeDistance(position, a.getPosition()) for a in invaders ]) features['defenderDistance'] = -100 if dist < 2 else dist else: features['defenderDistance'] = 100 # we got a power pill # ----------------------------------------------------- # Computes distance to power pellets # ----------------------------------------------------- isScared = any( successor.getAgentState(i).scaredTimer > 0 for i in self.getOpponents(successor)) powerList = self.getCapsules(successor) if (len(powerList) > 0) and not isScared: distances = (self.getMazeDistance(position, power) for power in powerList) features['distanceToPower'] = min(distances) else: features['distanceToPower'] = 0 # don't negative weight yet # ----------------------------------------------------- # computes distance to team mate # ----------------------------------------------------- partners = [ successor.getAgentState(i) for i in self.getTeam(successor) ] if len(partners) > 0: dist = max([ self.getMazeDistance(position, a.getPosition()) for a in partners ]) features['partnerDistance'] = dist else: features['partnerDistance'] = 0 # ----------------------------------------------------- # Computes distance to invaders we can see # ----------------------------------------------------- enemies = [ successor.getAgentState(i) for i in self.getOpponents(successor) ] invaders = [ a for a in enemies if a.isPacman and a.getPosition() != None ] if len(invaders) > 0 and not state.isPacman: dist = min([ self.getMazeDistance(position, a.getPosition()) for a in invaders ]) features['ghostDistance'] = dist else: features['ghostDistance'] = 0 # we are attacking # ----------------------------------------------------- # Compute distance to the nearest food # ----------------------------------------------------- foodList = self.getFood(successor).asList() if len(foodList) > 0: distances = (self.getMazeDistance(position, food) for food in foodList) features['distanceToFood'] = min(distances) return features
def getFeatures(self, gameState, action): #Start like getFeatures of OffensiveReflexAgent features = util.Counter() successor = self.getSuccessor(gameState,action) #Get other variables for later use food = self.getFood(gameState) capsules = gameState.getCapsules() foodList = food.asList() walls = gameState.getWalls() x, y = gameState.getAgentState(self.index).getPosition() vx, vy = Actions.directionToVector(action) newx = int(x + vx) newy = int(y + vy) #Get set of invaders and defenders enemies = [gameState.getAgentState(a) for a in self.getOpponents(gameState)] invaders = [a for a in enemies if not a.isPacman and a.getPosition() != None] defenders =[a for a in enemies if a.isPacman and a.getPosition() != None] #Check if pacman has stopped if action==Directions.STOP: features["stuck"] = 1.0 #Get ghosts close by for ghost in invaders: ghostpos = ghost.getPosition() neighbors = Actions.getLegalNeighbors(ghostpos, walls) if (newx, newy) == ghostpos: if ghost.scaredTimer == 0: features["scaredGhosts"] = 0 features["normalGhosts"] = 1 else: features["eatFood"] += 2 features["eatGhost"] += 1 elif ((newx, newy) in neighbors) and (ghost.scaredTimer > 0): features["scaredGhosts"] += 1 elif (successor.getAgentState(self.index).isPacman) and (ghost.scaredTimer > 0): features["scaredGhosts"] = 0 features["normalGhosts"] += 1 #How to act if scared or not scared if gameState.getAgentState(self.index).scaredTimer == 0: for ghost in defenders: ghostpos = ghost.getPosition() neighbors = Actions.getLegalNeighbors(ghostpos, walls) if (newx, newy) == ghostpos: features["eatInvader"] = 1 elif (newx, newy) in neighbors: features["closeInvader"] += 1 else: for ghost in enemies: if ghost.getPosition()!= None: ghostpos = ghost.getPosition() neighbors = Actions.getLegalNeighbors(ghostpos, walls) if (newx, newy) in neighbors: features["closeInvader"] += -10 features["eatInvader"] = -10 elif (newx, newy) == ghostpos: features["eatInvader"] = -10 #Get capsules when nearby for cx, cy in capsules: if newx == cx and newy == cy and successor.getAgentState(self.index).isPacman: features["eatCapsule"] = 1.0 #When to eat if not features["normalGhosts"]: if food[newx][newy]: features["eatFood"] = 1.0 if len(foodList) > 0: tempFood =[] for food in foodList: food_x, food_y = food adjustedindex = self.index-self.index%2 check1 = food_y>(adjustedindex/2) * walls.height/3 check2 = food_y<((adjustedindex/2)+1) * walls.height/3 if (check1 and check2): tempFood.append(food) if len(tempFood) == 0: tempFood = foodList mazedist = [self.getMazeDistance((newx, newy), food) for food in tempFood] if min(mazedist) is not None: walldimensions = walls.width * walls.height features["nearbyFood"] = float(min(mazedist)) / walldimensions features.divideAll(10.0) return features
def registerInitialState(self, gameState): """ This method handles the initial setup of the agent to populate useful fields (such as what team we're on). A distanceCalculator instance caches the maze distances between each pair of positions, so your agents can use: self.distancer.getDistance(p1, p2) IMPORTANT: This method may run for at most 15 seconds. """ self.walls = gameState.getWalls().data self.initCapsules(gameState) self.goHome = False self.stage = 0 self.startPostion = gameState.getInitialAgentPosition(self.index) self.width = gameState.getWalls().width self.height = gameState.getWalls().height self.foodNum = len(self.getFood(gameState).asList()) self.bePersuitedTime = 0 self.teamMap = {} self.teamMap[0] = 2 self.teamMap[1] = 3 self.teamMap[2] = 0 self.teamMap[3] = 1 ''' Make sure you do not delete the following line. If you would like to use Manhattan distances instead of maze distances in order to save on initialization time, please take a look at CaptureAgent.registerInitialState in captureAgents.py. ''' CaptureAgent.registerInitialState(self, gameState) self.weights = util.Counter() self.weights["can_be_captured"] = -2580.941410881 self.weights["closest-food"] = -258.941410881 self.weights["bias"] = -258.941410881 self.weights["eats-food"] = 147.237783878 self.weights["#-of-ghosts-1-step-away"] = -2580.941410881 # self.weights["team_dis"] = -256.941410881 self.weights["from_mid"] = -25.941410881 self.verticleDirection = set( [Directions.NORTH, Directions.SOUTH, Directions.STOP]) self.northEntrance = self.getNorthEntrance() self.southEntrance = self.getSouthEntrance() # self.weights2 = util.Counter() # self.weights2["closest-food"] = -258.941410881 # self.weights2["bias"] = -258.941410881 ''' Your initialization code goes here, if you need any. ''' self.caveDis = {} self.caveSet = set() self.caveEntry = set() # self.caveExit =set() for i in range(len(self.walls)): for j in range(len(self.walls[0])): if not self.walls[i][j]: self.myHandle(i, j) for i in range(len(self.walls)): for j in range(len(self.walls[0])): if not self.walls[i][j]: self.myBFS2(i, j) for c in list(self.caveSet): if (c[0] - 1, c[1] ) not in self.caveSet and not self.walls[c[0] - 1][c[1]]: self.caveEntry.add(c) continue if (c[0] + 1, c[1] ) not in self.caveSet and not self.walls[c[0] + 1][c[1]]: self.caveEntry.add(c) continue if (c[0], c[1] - 1) not in self.caveSet and not self.walls[c[0]][c[1] - 1]: self.caveEntry.add(c) continue if (c[0], c[1] + 1) not in self.caveSet and not self.walls[c[0]][c[1] + 1]: self.caveEntry.add(c) continue for c in list(self.caveEntry): self.tempset = set() self.tempcount = 0 self.myBFS(c[0], c[1], 1, c)
def getFeatures(self, state, action): # print self.caveEntry # print (23,4) in self.caveSet # extract the grid of food and wall locations and get the ghost locations food = self.getFood(state) x, y = state.getAgentPosition(self.index) dx, dy = Actions.directionToVector(action) next_x, next_y = int(x + dx), int(y + dy) walls = state.getWalls() ghostIndexList = self.getOpponents(state) ghostPositions = [state.getAgentPosition(g) for g in ghostIndexList] features = util.Counter() features["bias"] = 1.0 # compute the location of pacman after he takes the x if (next_x, next_y) in self.caveDis: caveDis, caveEntry = self.caveDis[next_x, next_y] for ghost in ghostIndexList: temp = state.getAgentPosition(ghost) if temp != None: if state.getAgentState(ghost).scaredTimer < 5: if self.getMazeDistance((next_x, next_y), temp) > 5: continue if caveDis >= self.getMazeDistance(temp, caveEntry) - 2: features["can_be_captured"] = 1 break else: features["can_be_captured"] = 0 # count the number of ghosts 1-step away features["#-of-ghosts-1-step-away"] = 0 for ghost in ghostIndexList: if state.getAgentPosition(ghost) != None: if state.getAgentState(ghost).scaredTimer < 3: if (next_x, next_y) in Actions.getLegalNeighbors( state.getAgentPosition(ghost), walls): if not self.isInMyArea((next_x, next_y)): features["#-of-ghosts-1-step-away"] += 1 else: if state.getAgentState(self.index).scaredTimer > 0: features["#-of-ghosts-1-step-away"] += 1 if not features["can_be_captured"] and food[next_x][next_y]: features["eats-food"] = 1.0 if len(food.asList()) < 3: features["eats-food"] = 1.0 if self.goHome: features["closest-food"] = self.getHomeDis((next_x, next_y), walls) elif state.data.timeleft < 80: self.goHome = True features["closest-food"] = self.getHomeDis((next_x, next_y), walls) elif self.bePersuitedTime > 20: # print (next_x,next_y),self.getHomeDis((next_x, next_y), walls),features features["closest-food"] = self.getHomeDis((next_x, next_y), walls) else: if self.index < 2: if self.isInMyArea((next_x, next_y)) and self.stage == 0: dist = self.getMazeDistance((next_x, next_y), self.northEntrance) else: dist = self.SouthClosestFood((next_x, next_y), food, walls) if dist is not None: # make the distance a number less than one otherwise the update # will diverge wildly features["closest-food"] = float(dist) / (walls.width * walls.height) else: # if self.isInMyArea((next_x, next_y)) and self.stage == 0: # dist = self.getMazeDistance((next_x, next_y), self.southEntrance) # else: dist = self.NorthClosestFood((next_x, next_y), food, walls) if dist is not None: # make the distance a number less than one otherwise the update # will diverge wildly features["closest-food"] = float(dist) / (walls.width * walls.height) features.divideAll(10.0) return features
def enhancedPacmanFeatures(state, action): """ For each state, this function is called with each legal action. It should return a counter with { <feature name> : <feature value>, ... } python dataClassifier.py -c perceptron -d pacman -f -g ContestAgent #sets 1 for the position of pacman and 0 otherwise for x in range(20): for y in range(20): if (x,y) == state.getPacmanPosition(): features[(x,y)] = 1 else: features[(x,y)] = 0 """ features = util.Counter() successor = state.generateSuccessor(0, action) agent_pos = successor.getPacmanPosition() ghosts = successor.getGhostPositions() ghost_state = successor.getGhostStates() capsules = successor.getCapsules() state_food = state.getFood() food = [(x, y) for x, row in enumerate(state_food) for y, food in enumerate(row) if food] nearest_ghosts = sorted( [util.manhattanDistance(agent_pos, i) for i in ghosts]) features["nearest_ghost"] = nearest_ghosts[0] #print(ghost_state) #if state.data.agentStates[nearest_ghosts[0]].scaredTimer > 0: # features[("ghost_scared", ghost_state)] = 1 #else: features[("ghost_scared", ghost_state)] = 0 #for i in xrange(min(len(nearest_ghosts), 1)): #features[("ghost", i)] = 5 / (0.1 + nearest_ghosts[i]) nearest_caps = sorted( [util.manhattanDistance(agent_pos, i) for i in capsules]) for i in xrange(min(len(nearest_caps), 1)): features[("capsule", i)] = 15 / (1 + nearest_caps[i]) nearest_food = sorted([util.manhattanDistance(agent_pos, i) for i in food]) for i, weight in zip(xrange(min(len(nearest_food), 5)), [1.3, 0.8] + [0.9] * 3): features[("food", i)] = weight * nearest_food[i] #features["capsule count"] = len(capsules) * 10 features["iswin"] = state.isWin() features["islose"] = state.isLose() features["score"] = state.getScore() #* 10 #features["pacman"]= agent_pos implemnteren werkt niet! return features
def getFeatures(self, state, agent_id): f = util.Counter() return f
def observe(self, observation, gameState): """Updates beliefs based on the distance observation and Pacman's position. When we enter this function pacman's distribution over possible locations of the ghost are stored in self.beliefs For any position p: self.beliefs[p] = Pr(Xt=p | e_{t-1}, e_{t-2}, ..., e_1) That is, pacman's distribution has already been updated by all prior observations already. This function should update self.beliefs[p] so that self.beliefs[p] = Pr(Xt=p |e_t, e_{t-1}, e_{t-2}, ..., e_1) That is, it should update pacman's distribution over the ghost's locations to account for the passed observation. noisyDistance (= the next observation e_t) is the estimated Manhattan distance to the ghost you are tracking. emissionModel = busters.getObservationDistribution(noisyDistance) stores the probability of having observed noisyDistance given any true distance you supply. That is emissionModel[trueDistance] = Pr(noisyDistance | trueDistance). Since our observations have to do with manhattanDistance with no indication of direction, we take Pr(noisyDistance | Xt=p) = Pr(noisyDistance | manhattanDistance(p,packmanPosition)) That is, the probability of observing noisyDistance given that the ghost is in position p is equal to the probability of having observed noisyDistance given the trueDistance between p and the pacman's current position. self.legalPositions is a list of the possible ghost positions (Only positions in self.legalPositions need to have their probability updated) A correct implementation will handle the following special case: * When a ghost is captured by Pacman, all beliefs should be updated so that pacman believes the ghost to be in its prison cell with probability 1, this position is self.getJailPosition() You can check if a ghost has been captured by Pacman by checking if it has a noisyDistance of None (a noisy distance of None will be returned if, and only if, the ghost is captured, note 0 != None). """ noisyDistance = observation emissionModel = busters.getObservationDistribution(noisyDistance) pacmanPosition = gameState.getPacmanPosition() "*** YOUR CODE HERE ***" #the code below updates pacman's beliefs so that it #has a uniform distribution over all possible positions #the ghost could be. # # Replace this code with a correct observation update # Be sure to handle the "jail" edge case where the ghost is eaten # and noisyDistance is None allPossible = util.Counter() for p in self.legalPositions: if noisyDistance is not None: distance = util.manhattanDistance(p, pacmanPosition) allPossible[p] = emissionModel[distance] * self.beliefs[p] else: if p == self.getJailPosition(): allPossible[p] = 1.0 "*** END YOUR CODE HERE ***" allPossible.normalize() self.beliefs = allPossible
def getWeights(self, state, agent_id): w = util.Counter() return w
def elapseTime(self, gameState): """Update self.beliefs in response to a time step passing from the current state. When we enter this function pacman's distribution over possible locations of the ghost are stored in self.beliefs For any position p: self.beliefs[p] = Pr(X_{t-1} = p | e_{t-1}, e_{t-2} ... e_1) That is, pacman has a distribution over the previous time step having taken into account all previous observations. This function should update self.beliefs so that self.beliefs[p] = P(Xt = p | e_{t-1}, e_{t_2} ..., e_1) That is, it should update pacman's distribution over the ghost's locations to account for progress in time. The transition model (Pr(X_t|X_{t-1) may depend on Pacman's current position (e.g., for DirectionalGhost). However, this is not a problem, as Pacman's current position is known. In order to obtain the distribution over new positions for the ghost, given its previous position (oldPos) as well as Pacman's current position, use this line of code: newPosDist = self.getPositionDistribution(self.setGhostPosition(gameState, oldPos)) newPosDist is a util.Counter object, where for each position p in self.legalPositions, newPostDist[p] = Pr( ghost is at position p at time t + 1 | ghost is at position oldPos at time t ) newPostDist[p] = Pr( ghost is at position p at time t + 1 | ghost is at position oldPos at time t ) You may also find it useful to loop over key, value pairs in newPosDist, like: for newPos, prob in newPosDist.items(): ... HINT. obtaining newPostDist is relatively expensive. If you look carefully at the HMM "progress in time" equation you will see that you can orgranize the computation so that you use newPosDist[p] for all values of p (by, e.g., the for loop above) before moving on the next newPosDist (generated by another oldPos). *** GORY DETAIL AHEAD *** As an implementation detail (with which you need not concern yourself), the line of code at the top of this comment block for obtaining newPosDist makes use of two helper methods provided in InferenceModule above: 1) self.setGhostPosition(gameState, ghostPosition) This method alters the gameState by placing the ghost we're tracking in a particular position. This altered gameState can be used to query what the ghost would do in this position. 2) self.getPositionDistribution(gameState) This method uses the ghost agent to determine what positions the ghost will move to from the provided gameState. The ghost must be placed in the gameState with a call to self.setGhostPosition above. It is worthwhile, however, to understand why these two helper methods are used and how they combine to give us a belief distribution over new positions after a time update from a particular position. """ "*** YOUR CODE HERE ***" allPossible = util.Counter() for p in self.legalPositions: newPosDist = self.getPositionDistribution(self.setGhostPosition(gameState, p)) for newPos, prob in newPosDist.items(): allPossible[newPos] += prob * self.beliefs[p] self.beliefs = allPossible "*** END YOUR CODE HERE ***"
def getFeatures(self, state, action): feats = util.Counter() feats[(state, action)] = 1.0 return feats
def observeState(self, gameState): """Resamples the set of particles using the likelihood of the noisy observations. To loop over the ghosts, use: for i in range(self.numGhosts): ... A correct implementation will handle two special cases: 1) When all particles get weight 0 due to the observation, a new set of particles need to be generated from the initial prior distribution by calling initializeParticles. 2) Otherwise after all new particles have been generated by resampling you must check if any ghosts have been captured by packman (noisyDistances[i] will be None if ghost i has ben captured). For each captured ghost, you need to change the i'th component of every particle (remember that the particles contain a position for every ghost---so you need to change the component associated with the i'th ghost.). In particular, if ghost i has been captured then the i'th component of every particle must be changed so the i'th ghost is in its prison cell (position self.getJailPosition(i)) Note that more than one ghost might be captured---you need to ensure that every particle puts every captured ghost in its prison cell. self.getParticleWithGhostInJail is a helper method to help you edit a specific particle. Since we store particles as tuples, they must be converted to a list, edited, and then converted back to a tuple. This is a common operation when placing a ghost in jail. Note that this function creates a new particle, that has to replace the old particle in your list of particles. HINT1. The weight of every particle is the product of the probabilities of associated with each ghost's noisyDistance observation HINT2. When computing the weight of a particle by looking at each ghost's noisyDistance observation make sure you check if the ghost has been captured. Captured ghost's are ignored in the weight computation (the particle's component for the captured ghost is updated the precise position later---so this corresponds to multiplying the weight by probability 1 """ pacmanPosition = gameState.getPacmanPosition() noisyDistances = gameState.getNoisyGhostDistances() if len(noisyDistances) < self.numGhosts: return emissionModels = [busters.getObservationDistribution(dist) for dist in noisyDistances] "*** YOUR CODE HERE ***" newBelief = util.Counter() for particle in self.particles: weight = 1.0 for index in range(self.numGhosts): if noisyDistances[index] is None: particle = self.getParticleWithGhostInJail(particle, index) else: distance = util.manhattanDistance(particle[index], pacmanPosition) weight *= emissionModels[index][distance] newBelief[particle] += weight if newBelief.totalCount() == 0: self.initializeParticles() else: self.particles = [] for i in range(self.numParticles): self.particles.append(util.sample(newBelief)) "*** END YOUR CODE HERE ***"
def getFeatures(self, gameState, action): features = util.Counter() # successor states for all agents successor = self.getSuccessor(gameState, action) # your successor state myState = successor.getAgentState(self.index) # your pos myPos = myState.getPosition() # Computes whether we're on defense (1) or offense (0) features['onDefense'] = 1 if myState.isPacman: features['onDefense'] = 0 # Computes distance to invaders we can see enemies = [successor.getAgentState(i) for i in self.getOpponents(successor)] # find invadors on other team invaders = [a for a in enemies if a.isPacman and a.getPosition() != None] features['numInvaders'] = len(invaders) allEnemies = [a for a in enemies if a.getPosition() != None] ##print "allEnemies ", allEnemies if allEnemies: ##print "I SEE U ", allEnemies pass ###print "invaders ", len(invaders) dists = [] #lastEatenFood = None # if there are invaders if len(invaders) > 0: #get the maze distance to each one ###print self.scaredTimer dists = [self.getMazeDistance(myPos, a.getPosition()) for a in invaders] closestPac = min(dists) if myState.scaredTimer: ###print myState.scaredTimer ###print gameState pass if myState.scaredTimer != 0 and myState.scaredTimer <= 25 and closestPac <2: #features['invaderDistance'] = 10 pass else: features['invaderDistance'] = min(dists) self.lastEatenFood = None ###print "features['invaderDistance'] chase ", features['invaderDistance'] # if there aren't any invaders else: #check yo food buddy oldFoodList = self.oldFood.asList() newFoodList = self.getFoodYouAreDefending(gameState).asList() # ##print "old" # ##print oldFoodList # ##print "new" # ##print newFoodList #if any piece has been eaten if oldFoodList != newFoodList: eatenFoodList = list(set(oldFoodList) - set(newFoodList)) self.lastEatenFood = eatenFoodList[0] self.oldFood = self.getFoodYouAreDefending(gameState) #move to the last eaten food if self.lastEatenFood: #print "TO FOOD" #print self.lastEatenFood distanceToFood = self.getMazeDistance(myPos, self.lastEatenFood) features['invaderDistance'] = distanceToFood # remove the last eaten food after you've been to it and bug fixing if myPos == self.lastEatenFood or self.lastEatenFood[0] >= gameState.data.layout.width/2-2 or gameState.data.layout.walls[self.lastEatenFood[0]][self.lastEatenFood[1]]: self.lastEatenFood = None #patrol behavior else: #print "PATROL" #print self.patrolPoints p = self.patrolPoints[self.i] if myPos == p: self.i+=1 if self.i>=len(self.patrolPoints): self.i = 0 distanceToPoint = self.getMazeDistance(myPos, p) features['invaderDistance'] = distanceToPoint if action == Directions.STOP: features['stop'] = 1 rev = Directions.REVERSE[gameState.getAgentState(self.index).configuration.direction] if action == rev: features['reverse'] = 1 return features
def trainAndTune(self, trainingData, trainingLabels, validationData, validationLabels, kgrid): """ Trains the classifier by collecting counts over the training data, and stores the Laplace smoothed estimates so that they can be used to classify. Evaluate each value of k in kgrid to choose the smoothing parameter that gives the best accuracy on the held-out validationData. trainingData and validationData are lists of feature Counters. The corresponding label lists contain the correct label for each datum. To get the list of all possible features or labels, use self.features and self.legalLabels. """ "*** YOUR CODE HERE ***" # util.raiseNotDefined() print "Begin define train and tune..." # print self.features # print self.legalLabels c = util.Counter() k_res = util.Counter() for index in range(len(trainingData)): datum = trainingData[index]; label = trainingLabels[index]; for key in datum.sortedKeys(): if(datum[key]==1): c[(1,key,label)] += 1 # elif (datum[key]==2): # c[(2,key,label)] += 1 else: c[(0,key,label)] += 1 # Conditional Probabilities for k in kgrid: print "Set k = ", k for feature in self.features: for label in self.legalLabels: # S = c[(1, feature, label)] + k + c[(0, feature, label)] + k + c[(2, feature, label)] + k # self.P[(2, feature, label)] = (c[(2, feature, label)] + k) / (S * 1.0) # self.P[(1, feature, label)] = (c[(1, feature, label)] + k) / (S * 1.0) # self.P[(0, feature, label)] = (c[(0, feature, label)] + k) / (S * 1.0) S = c[(1, feature, label)] + k + c[(0, feature, label)] + k self.P[(1, feature, label)] = (c[(1, feature, label)] + k) / (S * 1.0) self.P[(0, feature, label)] = (c[(0, feature, label)] + k) / (S * 1.0) # calculate the accuracy guesses = self.classify(validationData) correct = [guesses[i] == validationLabels[i] for i in range(len(validationLabels))].count(True) k_res[k] = 100.0 * correct / len(validationLabels) print "Accuracy = ", k_res[k], "%" # Evalute and choose the otimum k print k_res k = k_res.sortedKeys()[0] print "Set k = ", k # Reassign conditional probabilities for feature in self.features: for label in self.legalLabels: # S = c[(1, feature, label)] + k + c[(0, feature, label)] + k + c[(2, feature, label)] + k # self.P[(2, feature, label)] = (c[(2, feature, label)] + k) / S # self.P[(1, feature, label)] = (c[(1, feature, label)] + k) / S # self.P[(0, feature, label)] = (c[(0, feature, label)] + k) / S S = c[(1, feature, label)] + k + c[(0, feature, label)] + k self.P[(1, feature, label)] = (c[(1, feature, label)] + k) / (S * 1.0) self.P[(0, feature, label)] = (c[(0, feature, label)] + k) / (S * 1.0)
""" Your value iteration agent should take an mdp on construction, run the indicated number of iterations and then act according to the resulting policy. Some useful mdp methods you will use: mdp.getStates() mdp.getPossibleActions(state) mdp.getTransitionStatesAndProbs(state, action) mdp.getReward(state, action, nextState) mdp.isTerminal(state) """ self.mdp = mdp self.discount = discount self.iterations = iterations self.values = util.Counter() # A Counter is a dict with default 0 # Write value iteration code here for i in range(iterations): storeValues = util.Counter() states = mdp.getStates() for s in states: actions = mdp.getPossibleActions(s) if(len(actions) == 0): continue qVals = [self.getQValue(s, a) for a in actions] storeValues[s] = max(qVals) self.values = storeValues def getValue(self, state): """
def elapseTime(self, gameState): """ Update self.beliefs in response to a time step passing from the current state. The transition model is not entirely stationary: it may depend on Pacman's current position (e.g., for DirectionalGhost). However, this is not a problem, as Pacman's current position is known. In order to obtain the distribution over new positions for the ghost, given its previous position (oldPos) as well as Pacman's current position, use this line of code: newPosDist = self.getPositionDistribution(self.setGhostPosition(gameState, oldPos)) Note that you may need to replace "oldPos" with the correct name of the variable that you have used to refer to the previous ghost position for which you are computing this distribution. You will need to compute multiple position distributions for a single update. newPosDist is a util.Counter object, where for each position p in self.legalPositions, newPostDist[p] = Pr( ghost is at position p at time t + 1 | ghost is at position oldPos at time t ) (and also given Pacman's current position). You may also find it useful to loop over key, value pairs in newPosDist, like: for newPos, prob in newPosDist.items(): ... *** GORY DETAIL AHEAD *** As an implementation detail (with which you need not concern yourself), the line of code at the top of this comment block for obtaining newPosDist makes use of two helper methods provided in InferenceModule above: 1) self.setGhostPosition(gameState, ghostPosition) This method alters the gameState by placing the ghost we're tracking in a particular position. This altered gameState can be used to query what the ghost would do in this position. 2) self.getPositionDistribution(gameState) This method uses the ghost agent to determine what positions the ghost will move to from the provided gameState. The ghost must be placed in the gameState with a call to self.setGhostPosition above. It is worthwhile, however, to understand why these two helper methods are used and how they combine to give us a belief distribution over new positions after a time update from a particular position. """ "*** YOUR CODE HERE ***" next_state_beliefs = util.Counter() for current_position, current_probability in self.beliefs.items(): next_state_distribution = self.getPositionDistribution(self.setGhostPosition(gameState, current_position)) for next_position, next_probability in next_state_distribution.items(): next_state_beliefs[next_position] += current_probability * next_probability next_state_beliefs.normalize() self.beliefs = next_state_beliefs
def initializeUniformly(self, gameState): "Begin with a uniform distribution over ghost positions." self.beliefs = util.Counter() for p in self.legalPositions: self.beliefs[p] = 1.0 self.beliefs.normalize()
def runValueIteration(self): for _ in range(self.iterations): nextValues = util.Counter() for state in self.mdp.getStates(): nextValues[state] = self.getMaxQ(state) self.values = nextValues
def getDistribution( self, state ): dist = util.Counter() for a in state.getLegalActions( self.index ): dist[a] = 1.0 dist.normalize() return dist
def __init__(self, legalLabels, maxIterations): PerceptronClassifier.__init__(self, legalLabels, maxIterations) self.weights = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.q_values = util.Counter()
def getDistribution( self, state ): dist = util.Counter() dist[Directions.STOP] = 1.0 return dist