def getAction(self,state): walls = api.walls(state) width,height = api.corners(state)[-1] legal = api.legalActions(state) me = api.whereAmI(state) food = api.food(state) ghosts = api.ghosts(state) capsules = api.capsules(state) direction = Directions.STOP x, y = me if not hasattr(self, 'map'): self.createMap(walls, width + 1, height + 1) self.checkForCapsules(capsules, legal, ghosts) legal = self.solveLoop(ghosts, legal) if len(ghosts): self.memorizeGhosts(ghosts) if self.counter < 0: for ghost in ghosts: legal = self.checkForGhosts(ghost, me, legal) direction = self.pickMove(me, legal, width + 1, height + 1, food) self.updatePosition(me, 1, self.map) self.printMap(self.map) self.last = direction return direction
def registerInitialState(self, state): self.initial_num_food = len(api.food(state)) self.corners = api.corners(state) self.width = max(self.corners)[0] + 1 # max x coordinate + 1 self.height = max(self.corners, key=itemgetter(1))[1] + 1 # max y coordinate + 1 self.walls = api.walls(state)
def getAction(self, state): print "-" * 30 #divider ghosts = api.ghosts(state) #get state of ghosts legal = state.getLegalPacmanActions() #Again, get a list of pacman's legal actions last = state.getPacmanState().configuration.direction #store last move pacman = api.whereAmI(state) #retrieve location of pacman food = api.food(state) #retrieve location of food walls = api.walls(state) #how to call getfoodvalmap method. #In reality, the reward should be the final value-iteration of the grid. foodVal = self.getValueMap(state, 10) print foodVal #example on how to use getPacMEU function currentUtil = self.getPacMEU(pacman[0], pacman[1], foodVal, legal) print "Utility values: " print currentUtil print max(currentUtil.values()) #example on how to use getMEU function foodUtil = self.getMEU((18, 3), foodVal, walls) print "max utility for (18, 3) is: " print foodUtil if Directions.STOP in legal: legal.remove(Directions.STOP) # Random choice between the legal options. return api.makeMove(random.choice(legal), legal) """
def ghost_update(self, state): """ Update ghosts to the map """ ghosts = api.ghostStatesWithTimes(state) for ghost in ghosts: # if ghosts are scared and the time of remaining scared # is longer than 2 (set 2 for safety) if ghost[1] > 2: # ignore the ghosts continue else: # if ghosts are not scared or the time of being scared # is less than 2 ghost_x = int(ghost[0][0]) ghost_y = int(ghost[0][1]) # set ghost reward to ghost self.map.set_value(ghost_x, ghost_y, self.ghost_reward) # for safety, also assign ghost_reward to cells next to ghosts ghost_neighbors = self.four_neighbors(ghost_x, ghost_y) for neighbor in ghost_neighbors: # if not wall if neighbor not in api.walls(state): self.map.set_value(neighbor[0], neighbor[1], self.ghost_reward)
def wall_update(self, state): """ Add walls to the map """ walls = api.walls(state) for wall in walls: self.map.set_value(wall[0], wall[1], "*")
def generateRewardGrid(self, state): # a negative incentive for non-terminal states # this is an incentive for taking the shortest route initialValue = -5 # initialize 2d array with correct dimensions (w, h) = api.corners(state)[3] rewardGrid = [[initialValue for x in range(w + 1)] for y in range(h + 1)] ghosts = api.ghosts(state) foods = api.food(state) walls = api.walls(state) for (x, y) in foods: rewardGrid[y][x] = 100 # fill a radius around each ghost with negative reward # size of radius dependent on number of foods remaining # pacman feels no fear when almost winning radius = 5 if len(foods) > 3 else 2 for (x, y) in ghosts: self.floodFill(rewardGrid, int(x), int(y), radius) for (x, y) in walls: rewardGrid[y][x] = 0 return rewardGrid
def stateMapping(self, state): """ create a dictionary that maps all walkable grid squares to all of the states it's possible to reach from them based on the set of available actions at that square """ walls = set(api.walls(state)) stateDict = dict.fromkeys(self.rewardDict.keys()) # loop through all squares in the map, for each one assign the possible states that could result from # carrying out the actions 'North', 'South', 'East' or 'West' at that point under the stochastic transition # model. Assign possible state values so that the intended result of each action is first. for i in stateDict.keys(): tmp = self.neighbours(i) stateDict[i] = { 'North': [tmp[3], tmp[0], tmp[2]], 'South': [tmp[1], tmp[0], tmp[2]], 'East': [tmp[0], tmp[3], tmp[1]], 'West': [tmp[2], tmp[3], tmp[1]], } # if any of the possible states for a grid square represent walls, overwrite them with the initial grid # square value instead, i.e. if trying to move into a wall remain in position. for a, b in stateDict[i].items(): for s in b: if s in walls: b[b.index(s)] = i self.stateDict = stateDict
def valueIterationSmall(self, state, reward, gamma, V1): # Similar to valueIteration function # does not calculate buffers around ghosts (cause it would be too small) # meant for maps smaller than 10 x 10 corners = api.corners(state) walls = api.walls(state) food = api.food(state) ghosts = api.ghosts(state) capsules = api.capsules(state) maxWidth = self.getLayoutWidth(corners) - 1 maxHeight = self.getLayoutHeight(corners) - 1 if not (0 < gamma <= 1): raise ValueError("MDP must have a gamma between 0 and 1.") # Implement Bellman equation with 10-loop iteration # Since smaller maps do not require as big of a value iteration loop loops = 100 while loops > 0: V = V1.copy() # This will store the old values for i in range(maxWidth): for j in range(maxHeight): # Exclude any food because in this case it is the terminal state if (i, j) not in walls and (i, j) not in food and ( i, j) not in ghosts and (i, j) not in capsules: V1[(i, j)] = reward + gamma * self.getTransition(i, j, V) loops -= 1
def valueIteration(self, state, reward, gamma, valueMap): self.reward = reward self.gamma = gamma self.V1 = valueMap corners = api.corners(state) walls = api.walls(state) maxWidth = self.getLayoutWidth(corners) - 1 maxHeight = self.getLayoutHeight(corners) - 1 if not (0 < self.gamma <= 1): raise ValueError("MDP must have a gamma between 0 and 1.") # Implement Bellman equation with 15-loop iteration loops = 50 while loops > 0: V = self.V1.copy() # This will store the old values for i in range(maxWidth): for j in range(maxHeight): # Exclude any food because in this case it is the terminal state if (i, j) not in walls and self.V1[(i, j)] != 5: self.V1[(i, j)] = self.reward + self.gamma * self.getTransition(i, j, V) loops -= 1 return self.V1
def initialize(self, state): # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) # Get the actions we can try, and remove "STOP" if that is one of them. legal = api.legalActions(state) #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) x = pacman[0] y = pacman[1] if self.map == None: width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] self.map = [["?" for y in range(height)] for x in range(width)] for wall in walls: self.map[wall[0]][wall[1]] = "W" for food in foods: self.map[food[0]][food[1]] = "F" for capsule in capsules: self.map[capsule[0]][capsule[1]] = "F" self.map[x][y] = "0" self.init = True
def getAction(self, state): """ The function to work out next intended action carried out. Parameters: None Returns: Directions: Intended action that Pacman will carry out. """ current_pos = api.whereAmI(state) corners = api.corners(state) food = api.food(state) ghosts = api.ghosts(state) ghost_scared_time = api.ghostStatesWithTimes(state)[0][1] walls = api.walls(state) legal = api.legalActions(state) capsules = api.capsules(state) protected_coords = walls + ghosts + [current_pos] width = max(corners)[0] + 1 height = max(corners, key=itemgetter(1))[1] + 1 board = self.create_board(width, height, -0.04) board.set_position_values(food, 1) board.set_position_values(walls, 'x') board.set_position_values(capsules, 2) if ghost_scared_time < 5: board.set_position_values(ghosts, -3) # for i in range(height): # for j in range(width): # print board[i, j], # print # print print "GHOST LIST: ", ghosts for x, y in ghosts: # set the surrounding area around the ghost to half the reward of the ghost # avoids changing the reward of the ghost itself, the pacman and the walls # print "GHOST Coordinates: " + str(x) + " " + str(y) x_coordinates = [x - 1, x, x + 1] y_coordinates = [y - 1, y, y + 1] # print "X/Y Coordinates: " + str(x_coordinates) + " " + str(y_coordinates) for x_coord in x_coordinates: for y_coord in y_coordinates: if (x_coord, y_coord) not in protected_coords: # print("index: " + str((board.convert_y(y_coord), x_coord))) converted_y = board.convert_y(y_coord) # print "VALUE: " + str(board[board.convert_y(y), x]) board[converted_y, x_coord] = board[board.convert_y(y), x] / 2 # print "VALUE PART 2: " + str(board[converted_y, x_coord]) board = self.value_iteration(state, board) expected_utility = self.calculate_expected_utility( state, board, abs(current_pos[1] - (height - 1)), current_pos[0]) return max([(utility, action) for utility, action in expected_utility if action in legal])[1]
def final(self, state): walls = api.walls(state) width, height = api.corners(state)[-1] self.last = None self.createMap(walls, width + 1, height + 1) self.pos = (0,0) self.capsule = (0,0) self.ghosts = [(0,0)] food = api.food(state)
def registerInitialState(self, state): print "Running registerInitialState!" # Make a map of the right size self.makeMap(state) self.addWallsToMap(state) self.updateFoodInMap(state) self.map.display() self.updateUtilities(api.walls(state), api.food(state), api.ghosts(state), 1000) self.counter = 0
def getAction(self, state): # Demonstrates the information that Pacman can access about the state # of the game. # What are the current moves available legal = api.legalActions(state) print "Legal moves: ", legal # Where is Pacman? pacman = api.whereAmI(state) print "Pacman position: ", pacman # Where are the ghosts? print "Ghost positions:" theGhosts = api.ghosts(state) for i in range(len(theGhosts)): print theGhosts[i] print "timer" moreGhosts = api.ghostStatesWithTimes(state) for i in range(len(moreGhosts)): print moreGhosts[i] # How far away are the ghosts? print "Distance to ghosts:" for i in range(len(theGhosts)): print util.manhattanDistance(pacman, theGhosts[i]) # Where are the capsules? print "Capsule locations:" print api.capsules(state) # Where is the food? print "Food locations: " print api.food(state) # Where are the walls? print "Wall locations: " print api.walls(state) # getAction has to return a move. Here we pass "STOP" to the # API to ask Pacman to stay where they are. return api.makeMove(Directions.STOP, legal)
def addWallsFoodToMap(self, state): walls = api.walls(state) for i in range(len(walls)): self.map1.setValue(walls[i][0], walls[i][1], '%') self.map2.setValue(walls[i][0], walls[i][1], '%') food = api.food(state) #print food for i in range(len(food)): self.map1.setValue(food[i][0], food[i][1],1) self.map2.setValue(food[i][0], food[i][1],1)
def registerInitialState(self, state): print "Running registerInitialState!" # Make a map of the right size self.makeMap(state) self.addWallsToMap(state) self.updateFoodInMap(state) self.map.display() #Run the value iteration 1000 times at the start as it will stabilise the values for later on self.updateUtilities(api.walls(state), api.food(state), api.ghosts(state), 1000, state) self.counter = 0
def getAction(self, state): pacman = api.whereAmI(state) x = pacman[0] y = pacman[1] food = api.food(state) capsule = api.capsules(state) # Get the actions we can try, and remove "STOP" if that is one of them. legal = state.getLegalPacmanActions() walls = api.walls(state) q = Queue.Queue() if Directions.STOP in legal: legal.remove(Directions.STOP) if Directions.WEST in legal: #print "West" q.put(((x-1,y), Directions.WEST)) if Directions.EAST in legal: #print "East" q.put(((x+1,y), Directions.EAST)) if Directions.NORTH in legal: #print "North" q.put(((x,y+1), Directions.NORTH)) if Directions.SOUTH in legal: #print "South" q.put(((x,y-1), Directions.SOUTH)) #print food #print q.toString() while not q.empty(): possible = q.get() position = possible[0] x = position[0] y = position[1] print position if position in (food or capsules): print "moving" return api.makeMove(possible[1], legal) else: #print "searching" if (x-1,y) not in walls: q.put(((x-1,y), possible[1])) if (x+1,y) not in walls: q.put(((x+1,y), possible[1])) if (x,y+1) not in walls: q.put(((x,y+1), possible[1])) if (x,y-1) not in walls: q.put(((x,y-1), possible[1]))
def updateMap(self, state): ghost = api.ghosts(state) ghostStates = api.ghostStates(state) capsule = api.capsules(state) food = api.food(state) wall = api.walls(state) for node in self.mapDictionary: if node in wall: self.mapDictionary.update({node: 'X'}) return self.mapDictionary
def possibleMoves(self, state, coord): ## returns list of (direction, coord) pairs that are in legal moves) cur = coord possibleMoves = [] #north if (cur[0], cur[1] + 1) not in api.walls(state): possibleMoves.append((Directions.NORTH, (cur[0], cur[1] + 1))) #east if (cur[0] + 1, cur[1]) not in api.walls(state): possibleMoves.append((Directions.EAST, (cur[0] + 1, cur[1]))) #south if (cur[0], cur[1] - 1) not in api.walls(state): possibleMoves.append((Directions.SOUTH, (cur[0], cur[1] - 1))) #west if (cur[0] - 1, cur[1]) not in api.walls(state): possibleMoves.append((Directions.WEST, (cur[0] - 1, cur[1]))) #possible moves is (dir, coord) pair return possibleMoves
def initialize(self, state): #sets the reward of each grid self.reward = None #set the utility of each grid self.utility = None # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) #pacman's x position pacmanX = pacman[0] #pacman's y position pacmanY = pacman[1] #if the internal map has not been initialized if self.reward == None and self.utility == None: #finds the dimension of the map by location the extremes, in this case the corners width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] #once the size of the map has been identified, initialize the rewards of each position with the approriate value self.reward = [[self.baseReward for y in range(height+1)] for x in range(width+1)] #do the same with the utility, however with random values between 0 and 1 self.utility = [[random() for y in range(height+1)] for x in range(width+1)] #now add in all the information pacman knows initially. starting with all known locations of food for food in foods: #set the reward of food to the value defined above self.reward[food[0]][food[1]] = self.foodReward #self.utility[food[0]][food[1]] = self.foodReward #set the reward of capsules with the reward defined above for capsule in capsules: self.reward[capsule[0]][capsule[1]] = self.capsuleReward #self.utility[capsule[0]][capsule[1]] = self.capsuleReward #now mark the location of the walls on the map, using "W" for wall in walls: self.reward[wall[0]][wall[1]] = "W" self.utility[wall[0]][wall[1]] = "W" #set init to true as the map has been initialized self.init = True
def getSuccessor(self, state, x, y): successors = [] walls = api.walls(state) for action in [api.Directions.NORTH, api.Directions.SOUTH, api.Directions.EAST, api.Directions.WEST]: self.x = x self.y = y dx, dy = self.directiontoVector(action) nextx, nexty = int(self.x + dx), int(self.y + dy) if (nextx, nexty) not in walls: nextState = (nextx, nexty) successors.append(nextState) return successors
def updateMap(self, state): corners = api.corners(state) ghosts = api.ghosts(state) me = api.whereAmI(state) walls = api.walls(state) capsules = api.capsules(state) foods = api.food(state) # Width and Height of Map width, height = corners[3][0] + 1, corners[3][1] + 1 # Generate empty world Map if (self.worldMap is None): self.worldMap = [[[' ', self.emptyReward, 0, Directions.STOP] for x in range(width)] for y in range(height)] self.setMap(me[0], me[1], ['M', self.meReward, 0, Directions.STOP]) for food in foods: self.setMap(food[0], food[1], ['F', self.foodReward, 0, Directions.STOP]) for capsule in capsules: self.setMap(capsule[0], capsule[1], ['C', self.capsuleReward, 0, Directions.STOP]) for wall in walls: self.setMap(wall[0], wall[1], ['W', self.wallReward, 0, Directions.STOP]) for ghost in ghosts: self.setMap(ghost[0], ghost[1], ['G', self.ghostReward, 0, Directions.STOP]) else: self.clearMapKeepState(state) self.setThing(me[1], me[0], 'M') for food in foods: self.setThing(food[1], food[0], 'F') for capsule in capsules: self.setThing(capsule[1], capsule[0], 'C') for wall in walls: self.setThing(wall[1], wall[0], 'W') for ghost in ghosts: self.setThing(ghost[1], ghost[0], 'G') self.setReward(me[0], me[1], self.meReward) for food in foods: self.setReward(food[0], food[1], self.foodReward) for capsule in capsules: self.setReward(capsule[0], capsule[1], self.capsuleReward) for wall in walls: self.setReward(wall[0], wall[1], self.wallReward) for ghost in ghosts: self.setReward(ghost[0], ghost[1], self.ghostReward)
def bestMove(self, state, gridVals): """ Based on Pacman's current position, return the neighbouring co-ordinate location associated with the maximum expected utility """ walls = set(api.walls(state)) loc = api.whereAmI(state) # generate neighbours of Pacman's location possibleStates = [i for i in self.neighbours(loc) if i not in walls] # the utilities of those neighbours U_states = [gridVals[i] for i in possibleStates] # the location of the maximum utility bestMove = possibleStates[U_states.index(max(U_states))] return bestMove
def registerInitialState(self, state): print "Initialising Map..." corners = api.corners(state) # The furthest corner from (0, 0) gives the width and height of the map (given it starts at (0, 0)) (width, height) = sorted(corners, key=lambda x: util.manhattanDistance( (0, 0), x), reverse=True)[0] self.map.initialise(height, width, api.walls(state)) # Set base values for rewards (set food but not ghosts) self.map.initialiseRewards(api.food(state)) # Print map when debugging if DEBUG: self.map.display()
def possibleMoves(self, state, coord): ## Returns the list of possible moves from the specified coordinate # and the resulting coordinate that move would result in # # Return type is list of (direction, coordinate) pairs # eg. [(Directions.NORTH, (3,4)), (Directions.EAST, (4,3))] possibleMoves = [] #north if (coord[0], coord[1] + 1) not in api.walls(state): possibleMoves.append((Directions.NORTH, (coord[0], coord[1] + 1))) #east if (coord[0] + 1, coord[1]) not in api.walls(state): possibleMoves.append((Directions.EAST, (coord[0] + 1, coord[1]))) #south if (coord[0], coord[1] - 1) not in api.walls(state): possibleMoves.append((Directions.SOUTH, (coord[0], coord[1] - 1))) #west if (coord[0] - 1, coord[1]) not in api.walls(state): possibleMoves.append((Directions.WEST, (coord[0] - 1, coord[1]))) return possibleMoves
def value_iteration(self, state, board): """ The function to carry out value iteration. Parameters: board (Board): Chosen board to use. Returns: Board: Board with updated utility values stored in each cell of board. """ board_copy = copy.deepcopy(board) gamma = 0.9 # discount factor iterations = 14 # max number of iterations threshold = 0.1 # Positions where value stored should not be altered. protected_pos = api.ghosts(state) + api.walls(state) while iterations > 0: U = copy.deepcopy(board_copy) # total differences between previous board and new board which has been made # at the end of the iteration total_difference = 0 for row in range(self.height): for col in range(self.width): value = board_copy[row, col] # Check to make sure this position is not where a wall or a ghost is. if (col, board.convert_y(row)) not in protected_pos: # just take the utility from the list returned by calculate_expected_utility expected_utility = [ utility[0] for utility in self.calculate_expected_utility( state, U, row, col) ] max_expected_utility = max(expected_utility) board_copy[row, col] = board[row, col] + gamma * \ max_expected_utility # Bellman's equation # calculate differences for each position using the old board(U) and new board(board_copy) for row in range(self.height): for col in range(self.width): if (col, board.convert_y(row)) not in protected_pos: value = board_copy[row, col] total_difference += abs(round(value - U[row, col], 4)) if total_difference <= threshold: break iterations -= 1 return board_copy
def buildMap(self, state, directions): gameMap = {} corners = api.corners(state) for x in range(corners[0][0], corners[1][0] + 1): for y in range(corners[2][0], corners[3][1] + 1): coord = (x, y) gameMap[coord] = (-0.04, 0) walls = api.walls(state) for wall in walls: gameMap[wall] = (-1000, 0) largeOrSmall = True if corners[1][0] < 10 and corners[3][1] < 10: largeOrSmall = False return self.updateMap(state, gameMap, directions, largeOrSmall)
def initialize(self, state): #sets the reward of each grid self.reward = None #set the utility of each grid self.utility = None # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) pacmanX = pacman[0] pacmanY = pacman[1] #if the internal map has not been initialized if self.reward == None and self.utility == None: #finds the dimension of the map by location the extremes, in this case the corners width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] #once the size of the map has been identified, fill it up with "?", as pacman does not know what is in there self.reward = [[-1 for y in range(height + 1)] for x in range(width + 1)] self.utility = [[random() for y in range(height + 1)] for x in range(width + 1)] #now add in all the information pacman knows initially. starting with all known locations of food for food in foods: #use "F" to mark food on the map self.reward[food[0]][food[1]] = 10 #now mark the location of capsules on the map, this time using "C" for capsule in capsules: self.reward[capsule[0]][capsule[1]] = 5 #now mark the location of the walls on the map, using "W" for wall in walls: self.reward[wall[0]][wall[1]] = "W" self.utility[wall[0]][wall[1]] = "W" #set init to true as the map has been initialized self.init = True
def initialize(self, state): #print "initializing map" #sets the path of pacman to be empty self.path = [] #sets the internal map of pacman to be empty self.map = None # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) # Get the actions we can try, and remove "STOP" if that is one of them. #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) pacmanX = pacman[0] pacmanY = pacman[1] #if the internal map has not been initialized if self.map == None: #finds the dimension of the map by location the extremes, in this case the corners width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] #once the size of the map has been identified, fill it up with "?", as pacman does not know what is in there self.map = [["?" for y in range(height+1)] for x in range(width+1)] #now add in all the information pacman knows initially. starting with all known locations of food for food in foods: #use "F" to mark food on the map self.map[food[0]][food[1]] = "F" #now mark the location of capsules on the map, this time using "C" for capsule in capsules: self.map[capsule[0]][capsule[1]] = "C" #now mark the location of the walls on the map, using "W" for wall in walls: self.map[wall[0]][wall[1]] = "W" #last pacman knows where it is, so mark that as "P" self.map[pacmanX][pacmanY] = "P" #set init to true as the map has been initialized self.init = True
def bellmanUpdate( self, state ): #preforms bellmans over the entire map using bellmans equation on each cell ghostList = api.ghosts(state) foodList = api.food(state) wallList = api.walls(state) scaredTime = api.ghostStatesWithTimes(state)[0][1] capsuleList = api.capsules(state) width = self.map.width height = self.map.height done = False while not done: #loops over map preforming bellmans until previous map from the last iteration is equal to the map at the end of the new iteration oldMap = deepcopy(self.map) for x in range(0, width): for y in range(0, height): if oldMap[x][y] != None: bestUtil = -1000 moves = [ oldMap[x][y + 1], oldMap[x + 1][y], oldMap[x][y - 1], oldMap[x - 1][y] ] # list of all possible moves from the current cell for i in range( len(moves) ): # finds the best util possible based on all legal moves to uses in value iteration if moves[i] != None: tutil = moves[i] * 0.8 if moves[i - 1] != None: tutil += moves[i - 1] * 0.1 else: tutil += oldMap[x][y] * 0.1 if moves[(i + 1) % 4] != None: tutil += moves[(i + 1) % 4] * 0.1 else: tutil += oldMap[x][y] * 0.1 if tutil > bestUtil: bestUtil = deepcopy(tutil) self.map[x][y] = (bestUtil * 0.9) + self.reward( x, y, state, ghostList, foodList, capsuleList, scaredTime, wallList) #bellmans equation using rewards functon done = self.checkSame( oldMap, self.map ) #checks to see whether old map is the same as new map