class TDL_solution: def __init__(self): self.game = GridWorld( (5,5)) self.squareCountGrid = self.game.createSquareCount() self.alpha = 0.1 self.gamma = 0.9 def playTDLGame(self,startSquare, randomMove): self.game.currentSquare = startSquare keepPlaying = not self.game.gameOver() squares_and_returns = [(self.game.currentSquare,0)] while keepPlaying: #policy i = self.game.currentSquare[0] j = self.game.currentSquare[1] move = self.game.policyGrid[i][j] if randomMove < np.random.rand(): moves = self.game.possibleMoves((i,j)) moves.remove(move) if len(moves) > 0: idx = np.random.randint(0,len(moves)) move = moves[idx] #move self.game.move(move) i = self.game.currentSquare[0] j = self.game.currentSquare[1] theReturn = self.game.returnGrid[i][j] squares_and_returns.append( (self.game.currentSquare,theReturn) ) keepPlaying = not self.game.gameOver() G = 0 self.squares_and_values = [] for square , theReturn in reversed(squares_and_returns): self.squares_and_values.append( (square,G) ) G = theReturn + self.game.gamma*G #self.squares_and_values.reverse() def playSarsa(self,startSquare, randomMove): self.game.currentSquare = startSquare keepPlaying = not self.game.gameOver() while keepPlaying: #policy i1 = self.game.currentSquare[0] j1 = self.game.currentSquare[1] move = self.game.policyGrid[i1][j1] if randomMove < np.random.rand(): moves = self.game.possibleMoves((i1,j1)) print( str(i1) + " " + str(j1) + " " + str(moves) + " " + str(move) ) moves.remove(move) if len(moves) > 0: idx = np.random.randint(0,len(moves)) move = moves[idx] #move self.game.move(move) i2 = self.game.currentSquare[0] j2 = self.game.currentSquare[1] theReturn = self.game.returnGrid[i2][j2] self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(theReturn + self.gamma*self.game.valueGrid[i2][j2]- self.game.valueGrid[i1][j1] ) keepPlaying = not self.game.gameOver() def playQLearning(self,startSquare, randomMove): self.game.currentSquare = startSquare keepPlaying = not self.game.gameOver() while keepPlaying: #policy i1 = self.game.currentSquare[0] j1 = self.game.currentSquare[1] move = self.game.policyGrid[i1][j1] # we use the best move even if random runs over it i3 = self.game.currentSquare[0] j3 = self.game.currentSquare[1] if randomMove < np.random.rand(): moves = self.game.possibleMoves((i1,j1)) print( str(i1) + " " + str(j1) + " " + str(moves) + " " + str(move) ) moves.remove(move) if len(moves) > 0: idx = np.random.randint(0,len(moves)) move = moves[idx] #move self.game.move(move) i2 = self.game.currentSquare[0] j2 = self.game.currentSquare[1] theReturn = self.game.returnGrid[i2][j2] self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(theReturn + self.gamma*self.game.valueGrid[i3][j3]- self.game.valueGrid[i1][j1] ) keepPlaying = not self.game.gameOver() def updateValueGrid(self): for t in range(len(self.squares_and_values) -1): square , _ = self.squares_and_values[t] nextSquare, value = self.squares_and_values[t+1] i1 = square[0] j1 = square[1] i2 = nextSquare[0] j2 = nextSquare[1] self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(value + self.gamma*self.game.valueGrid[i2][j2]- self.game.valueGrid[i1][j1] ) def updatePolicyGrid(self): #check if policy change #hasChanged = False #if bestMove is new set to true. rows = self.game.size[0] cols = self.game.size[1] change = False for i in range(rows): for j in range(cols): if self.game.policyGrid[i][j] in [0,1,2,3]: self.game.currentSquare = (i,j) oldMove = self.game.policyGrid[i][j] self.game.policyGrid[i][j] = self.game.bestMove() if oldMove != self.game.policyGrid[i][j]: change = True return change def printGrids(self): self.game.printPolicyGrid() self.game.printReturnGrid() self.game.printValueGrid()
class MC_solution: def __init__(self): self.game = GridWorld((5, 5)) self.squareCountGrid = self.game.createSquareCount() def playMCGame(self, startSquare, randomMove): self.game.currentSquare = startSquare keepPlaying = not self.game.gameOver() squares_and_returns = [(self.game.currentSquare, 0)] while keepPlaying: #policy i = self.game.currentSquare[0] j = self.game.currentSquare[1] move = self.game.policyGrid[i][j] if randomMove < np.random.rand(): moves = self.game.possibleMoves((i, j)) moves.remove(move) if len(moves) > 0: idx = np.random.randint(0, len(moves)) move = moves[idx] #move self.game.move(move) i = self.game.currentSquare[0] j = self.game.currentSquare[1] theReturn = self.game.returnGrid[i][j] squares_and_returns.append((self.game.currentSquare, theReturn)) keepPlaying = not self.game.gameOver() G = 0 self.squares_and_values = [] for square, theReturn in reversed(squares_and_returns): self.squares_and_values.append((square, G)) G = theReturn + self.game.gamma * G #self.squares_and_values.reverse() def updateValueGrid(self): visitedSquares = set() for square, G in self.squares_and_values: #print(square) if not square in visitedSquares: visitedSquares.add(square) i = square[0] j = square[1] self.squareCountGrid[i][j] += 1 self.game.valueGrid[i][j] = self.game.valueGrid[i][j] + ( G - self.game.valueGrid[i][j]) / self.squareCountGrid[i][j] def updatePolicyGrid(self): #check if policy change #hasChanged = False #if bestMove is new set to true. rows = self.game.size[0] cols = self.game.size[1] change = False for i in range(rows): for j in range(cols): if self.game.policyGrid[i][j] in [0, 1, 2, 3]: self.game.currentSquare = (i, j) oldMove = self.game.policyGrid[i][j] self.game.policyGrid[i][j] = self.game.bestMove() if oldMove != self.game.policyGrid[i][j]: change = True return change def printGrids(self): self.game.printPolicyGrid() self.game.printReturnGrid() self.game.printValueGrid() print(self.squareCountGrid)
class MazeRunner: def __init__(self, pygameIn): # initialize the pygame module self.pygame = pygameIn self.pygame.init() # load and set the logo self.UP = 0 self.RIGHT = 1 self.DOWN = 2 self.LEFT = 3 self.MAX_Y = 20 * 32 self.MAX_X = 16 * 32 self.INFO_X = 6 * 32 self.INFO_Y = self.MAX_Y #print(self.START_X) #print(self.START_Y) # positions and borders self.stepSize = 32 self.leftWall = 0 self.upperWall = 0 #screen and background logo = self.pygame.image.load("unicorn32.bmp") self.pygame.display.set_icon(logo) self.pygame.display.set_caption("Maze Runner") self.screen = self.pygame.display.set_mode( (self.MAX_X + self.INFO_X, self.MAX_Y)) self.score = 0 #border 16px of grey/white self.BLACK = (0, 0, 0) self.WHITE = (255, 255, 255) # main loop self.menuDict = {'Play': 1, 'DP': 2, 'MC': 3, 'Exit': 4} self.mazeDict = {'small': 1, 'medium': 2, 'large': 3, 'Exit': 4} self.loadImages() def loadImages(self): self.unicornImage = self.pygame.image.load("unicorn32.bmp") self.rainbowImage = pygame.image.load("rainbow32.bmp") self.wallImage = pygame.image.load("brick32.bmp") self.hellImage = pygame.image.load("hell32.bmp") self.appleImage = pygame.image.load("apple32.bmp") self.bombImage = pygame.image.load("bomb32.bmp") def drawBorder(self): FRAME = 8 color = (255, 255, 255) x1 = self.START_X - FRAME - 2 y1 = self.START_Y - FRAME x2 = self.START_X + self.MAZE_X + FRAME y2 = y1 #y2 = self.START_Y + self.MAZE_X + FRAME self.pygame.draw.line(self.screen, color, (x1, y1), (x2, y2), FRAME) #left x1 = self.START_X - FRAME y1 = self.START_Y - FRAME - 2 x2 = x1 y2 = self.START_Y + self.MAZE_X + FRAME self.pygame.draw.line(self.screen, color, (x1, y1), (x2, y2), FRAME) #right x1 = self.START_X + self.MAZE_X + FRAME y1 = self.START_Y - FRAME x2 = x1 y2 = self.START_Y + self.MAZE_X + FRAME self.pygame.draw.line(self.screen, color, (x1, y1), (x2, y2), FRAME) #bottom x1 = self.START_X - FRAME y1 = self.START_Y + self.MAZE_X + FRAME y2 = self.START_X + self.MAZE_X + FRAME y2 = self.START_Y + self.MAZE_X + FRAME self.pygame.draw.line(self.screen, color, (x1, y1), (x2, y2), FRAME) self.pygame.display.flip() def placeTokens(self): cols = self.GridWorldGame.size[0] rows = self.GridWorldGame.size[1] returnValue = self.GridWorldGame.returnGridValue for i in range(rows): for j in range(cols): if self.GridWorldGame.policyGrid[i][j] == -1: x = self.START_X + j * 32 y = self.START_Y + i * 32 self.screen.blit(self.wallImage, (x, y)) if self.GridWorldGame.policyGrid[i][ j] == 9 and self.GridWorldGame.returnGrid[i][j] > 0: x = self.START_X + j * 32 y = self.START_Y + i * 32 self.screen.blit(self.rainbowImage, (x, y)) if self.GridWorldGame.policyGrid[i][ j] == 9 and self.GridWorldGame.returnGrid[i][j] < 0: x = self.START_X + j * 32 y = self.START_Y + i * 32 self.screen.blit(self.hellImage, (x, y)) if not self.GridWorldGame.policyGrid[i][j] in [ 1, 9 ] and self.GridWorldGame.returnGrid[i][j] > returnValue: x = self.START_X + j * 32 y = self.START_Y + i * 32 self.screen.blit(self.appleImage, (x, y)) if not self.GridWorldGame.policyGrid[i][j] in [ 1, 9 ] and self.GridWorldGame.returnGrid[i][j] < returnValue: x = self.START_X + j * 32 y = self.START_Y + i * 32 self.screen.blit(self.bombImage, (x, y)) self.screen.blit(self.unicornImage, self.smileyPos) self.pygame.display.flip() def run(self): self.mainMenu() self.pygame.quit() def play(self): # event handling, gets all event from the event queue running = True while (running): self.pygame.time.delay(100) self.pygame.event.pump() key = self.pygame.key.get_pressed() if key[self.pygame.K_LEFT]: self.move(self.LEFT) self.printScore() if key[self.pygame.K_UP]: self.move(self.UP) self.printScore() if key[self.pygame.K_RIGHT]: self.move(self.RIGHT) self.printScore() if key[self.pygame.K_DOWN]: self.move(self.DOWN) self.printScore() if self.GridWorldGame.gameOver(): #self.pygame.quit() running = False break if key[self.pygame.K_q]: pass for event in self.pygame.event.get(): # only do something if the event is of type QUIT if event.type == self.pygame.QUIT: # maybe should go back to main menu or so self.pygame.quit() running = False def mazeMenu(self): self.clearScreen() self.menuItems = 0 self.menuItemsPos = [] self.menuItemIdx = 1 self.printText('Choose Maze') self.printText('Small Maze') self.printText('Medium') self.printText('Large') self.printText('Back') self.pygame.display.flip() self.pygame.time.delay(100) return self.chooseMaze() def chooseMaze(self): idx = 1 cursor = self.menuItemsPos[idx] self.screen.blit(self.unicornImage, cursor) while (True): self.pygame.time.delay(100) key = self.pygame.key.get_pressed() self.pygame.event.pump() x1 = cursor[0] y1 = cursor[1] if key[self.pygame.K_UP] and idx > 1: self.pygame.draw.rect(self.screen, self.BLACK, (x1, y1, 32, 32)) idx -= 1 cursor = self.menuItemsPos[idx] self.screen.blit(self.unicornImage, cursor) if key[self.pygame.K_DOWN] and idx < (self.menuItems - 1): self.pygame.draw.rect(self.screen, self.BLACK, (x1, y1, 32, 32)) idx += 1 cursor = self.menuItemsPos[idx] self.screen.blit(self.unicornImage, cursor) if key[self.pygame.K_RETURN]: if idx == self.mazeDict['small']: self.createSmallMaze() return True if idx == self.mazeDict['medium']: pass return True if idx == self.mazeDict['Exit']: return False self.pygame.display.flip() if self.quitting(): return False def createSmallMaze(self): #should be GridWorldSmall() self.GridWorldGame = GridWorld((5, 5)) cols = self.GridWorldGame.size[0] rows = self.GridWorldGame.size[1] self.MAZE_X = cols * 32 self.MAZE_Y = rows * 32 FRAME = 8 self.START_X = (self.MAX_X - cols * 32) / 2 + FRAME #what happens if its not 0 in %32 self.START_Y = (self.MAX_Y - rows * 32) / 2 + FRAME self.smileyPos = (self.START_X, self.START_Y) def dynamicProgramming(self): pass def setupGame(self): self.clearScreen() self.drawBorder() self.placeTokens() self.setupRightArea() self.GridWorldGame.returnCount = 0 self.GridWorldGame.currentSquare = (0, 0) self.printScore() def setupRightArea(self): FRAME = 4 white = (255, 255, 255) black = (0, 0, 0) x1 = self.MAX_X y1 = 0 x2 = self.MAX_X y2 = self.MAX_Y #y2 = self.START_Y + self.MAZE_X + FRAME self.pygame.draw.line(self.screen, white, (x1, y1), (x2, y2), FRAME) fontSize = 32 fontScore = self.pygame.freetype.Font('freesansbold.ttf', fontSize) x1 = self.MAX_X + 64 y1 = 64 x2 = 0 y2 = 0 (textScore, textposScore) = fontScore.render("Score", white, black) textposScore = [x1, y1, x2, y2] self.screen.blit(textScore, textposScore) rainbowImage = pygame.image.load("rainbow32.bmp") wallImage = pygame.image.load("brick32.bmp") hellImage = pygame.image.load("hell32.bmp") appleImage = pygame.image.load("apple32.bmp") bombImage = pygame.image.load("bomb32.bmp") fontSize = 24 adjustY = 12 #collect apples x1 = self.MAX_X + 8 y1 = 192 lengthOfText = self.printTextRightArea(24, "Collect: ", x1, y1) self.screen.blit(appleImage, (x1 + lengthOfText, y1 - adjustY)) #dont collect bombs x1 = self.MAX_X + 8 y1 = 256 lengthOfText = self.printTextRightArea(24, "Avoid: ", x1, y1) self.screen.blit(bombImage, (x1 + lengthOfText, y1 - adjustY)) #rainbow is good exit x1 = self.MAX_X + 8 y1 = 320 lengthOfText = self.printTextRightArea(24, "Good Exit: ", x1, y1) self.screen.blit(rainbowImage, (x1 + lengthOfText, y1 - adjustY)) #Flame is bad exit x1 = self.MAX_X + 8 y1 = 384 lengthOfText = self.printTextRightArea(24, "Bad Exit: ", x1, y1) self.screen.blit(hellImage, (x1 + lengthOfText, y1 - adjustY)) # its a wall x1 = self.MAX_X + 8 y1 = 448 lengthOfText = self.printTextRightArea(24, "Just a wall: ", x1, y1) self.screen.blit(wallImage, (x1 + lengthOfText, y1 - adjustY)) self.pygame.display.flip() def printTextRightArea(self, fontSize, text, x, y): fontToken = self.pygame.freetype.Font('freesansbold.ttf', fontSize) #collect apples x1 = x y1 = y x2 = 0 y2 = 0 (textCollect, textposCollect) = fontToken.render(text, self.WHITE, self.BLACK) lengthOfCollect = textposCollect[2] - textposCollect[0] textposCollect = [x1, y1, x2, y2] self.screen.blit(textCollect, textposCollect) return lengthOfCollect def printScore(self): score = str(self.GridWorldGame.returnCount) fontSize = 32 #erase is it needed? think so x1 = self.MAX_X + self.INFO_X / 2 y1 = 3 * 32 self.pygame.draw.rect(self.screen, self.BLACK, (x1, y1, 32, 32)) fontScore = self.pygame.freetype.Font('freesansbold.ttf', fontSize) white = (255, 255, 255) black = (0, 0, 0) (textScore, textposScore) = fontScore.render(score, white, black) textposScore = [x1, y1, 0, 0] self.screen.blit(textScore, textposScore) self.pygame.display.flip() def move(self, direction): oldSquare = self.GridWorldGame.currentSquare x1 = oldSquare[1] * 32 + self.START_X y1 = oldSquare[0] * 32 + self.START_Y #print(oldSquare) self.GridWorldGame.move(direction) newSquare = self.GridWorldGame.currentSquare #print(newSquare) x2 = newSquare[1] * 32 + self.START_X y2 = newSquare[0] * 32 + self.START_Y self.pygame.draw.rect(self.screen, self.BLACK, (x1, y1, 32, 32)) self.screen.blit(self.unicornImage, (x2, y2)) self.pygame.display.flip() def mainMenu(self): #https://www.programcreek.com/python/example/93421/pygame.freetype running = True while (running): self.clearScreen() self.menuItems = 0 self.menuItemsPos = [] self.menuItemIdx = 1 self.printText('Main menu') self.printText('Play game') self.printText('Dynamic Programming') self.printText('Monte Carlo') self.printText('Exit') self.pygame.display.flip() self.pygame.time.delay(100) running = self.choseFromMenu() def printText(self, text): if self.menuItems == 0: fontSize = 48 startY = 32 else: fontSize = 24 startY = 32 + self.menuItems * 48 fontMenu = self.pygame.freetype.Font('freesansbold.ttf', fontSize) white = (255, 255, 255) black = (0, 0, 0) (textMenu, textposMenu) = fontMenu.render(text, white, black) lengthOfText = textposMenu[2] - textposMenu[0] textposMenu[0] = (self.MAX_X + self.INFO_X - lengthOfText) / 2 textposMenu[1] = startY textposMenu[2] = textposMenu[2] + lengthOfText textposMenu[3] = textposMenu[1] + 32 self.menuItemsPos.append((textposMenu[0] - 32, startY)) self.screen.blit(textMenu, textposMenu) self.menuItems += 1 #self.pygame.display.flip() def choseFromMenu(self): idx = 1 cursor = self.menuItemsPos[idx] self.screen.blit(self.unicornImage, cursor) while (True): self.pygame.time.delay(100) key = self.pygame.key.get_pressed() self.pygame.event.pump() x1 = cursor[0] y1 = cursor[1] if key[self.pygame.K_UP] and idx > 1: self.pygame.draw.rect(self.screen, self.BLACK, (x1, y1, 32, 32)) idx -= 1 cursor = self.menuItemsPos[idx] self.screen.blit(self.unicornImage, cursor) if key[self.pygame.K_DOWN] and idx < (self.menuItems - 1): self.pygame.draw.rect(self.screen, self.BLACK, (x1, y1, 32, 32)) idx += 1 cursor = self.menuItemsPos[idx] self.screen.blit(self.unicornImage, cursor) if key[self.pygame.K_RETURN]: if idx == self.menuDict['Play']: if self.mazeMenu(): self.setupGame() self.play() self.endScreen() return True else: return True if idx == self.menuDict['DP']: self.dynamicProgramming() return True if idx == self.menuDict['Exit']: return False self.pygame.display.flip() if self.quitting(): return False def quitting(self): for event in self.pygame.event.get(): # only do something if the event is of type QUIT if event.type == self.pygame.QUIT: # maybe should go back to main menu or so self.pygame.quit() return True return False def clearScreen(self): self.pygame.draw.rect(self.screen, self.BLACK, (0, 0, self.MAX_X + self.INFO_X, self.MAX_Y)) pass def endScreen(self): #remove maze self.clearScreen() fontSize = 32 fontScore = self.pygame.freetype.Font('freesansbold.ttf', fontSize) white = (255, 255, 255) black = (0, 0, 0) (textScore, textposScore) = fontScore.render("your score is", white, black) lengthOfText = textposScore[2] - textposScore[0] textposScore[0] = (self.MAX_X + self.INFO_X - lengthOfText) / 2 textposScore[1] = 64 textposScore[2] = 0 textposScore[3] = 0 self.screen.blit(textScore, textposScore) score = str(self.GridWorldGame.returnCount) (textScore, textposScore) = fontScore.render(score, white, black) lengthOfText = textposScore[2] - textposScore[0] textposScore[0] = (self.MAX_X + self.INFO_X - lengthOfText) / 2 textposScore[1] = 128 textposScore[2] = 0 textposScore[3] = 0 self.screen.blit(textScore, textposScore) (textScore, textposScore) = fontScore.render("Press Q to get back to menu", white, black) lengthOfText = textposScore[2] - textposScore[0] textposScore[0] = (self.MAX_X + self.INFO_X - lengthOfText) / 2 textposScore[1] = 192 textposScore[2] = 0 textposScore[3] = 0 self.screen.blit(textScore, textposScore) self.pygame.display.flip() #your score was #back to main menu? while (True): self.pygame.time.delay(100) self.pygame.event.pump() key = self.pygame.key.get_pressed() if key[self.pygame.K_q]: break for event in self.pygame.event.get(): # only do something if the event is of type QUIT if event.type == self.pygame.QUIT: # maybe should go back to main menu or so self.pygame.quit() break
class MC_Aprox_Solution: def __init__(self): self.game = GridWorld((5, 5)) self.learning_rate = 0.001 self.theta = np.random.randn(4) / 2 def s2x(self, square): return np.array( [square[0] - 1, square[1] - 1.5, square[0] * square[1] - 3, 1]) def playMCGame(self, startSquare, randomMove): self.game.currentSquare = startSquare keepPlaying = not self.game.gameOver() squares_and_returns = [(self.game.currentSquare, 0)] counter = 0 while keepPlaying: counter += 1 if counter > 2000: return False #policy i = self.game.currentSquare[0] j = self.game.currentSquare[1] move = self.game.policyGrid[i][j] if randomMove < np.random.rand(): moves = self.game.possibleMoves((i, j)) moves.remove(move) if len(moves) > 0: idx = np.random.randint(0, len(moves)) move = moves[idx] #move self.game.move(move) i = self.game.currentSquare[0] j = self.game.currentSquare[1] theReturn = self.game.returnGrid[i][j] squares_and_returns.append((self.game.currentSquare, theReturn)) keepPlaying = not self.game.gameOver() G = 0 self.squares_and_values = [] for square, theReturn in reversed(squares_and_returns): self.squares_and_values.append((square, G)) G = theReturn + self.game.gamma * G return True def updateValueGrid(self, t): visitedSquares = set() alpha = self.learning_rate / (t + 1) for square, G in self.squares_and_values: #print(square) if not square in visitedSquares: visitedSquares.add(square) old_theta = self.theta.copy() x = self.s2x(square) V_hat = theta.dot(x) self.theta += alpha * (G - V_hat) * x rows = self.game.size[0] cols = self.game.size[1] for i in range(rows): for j in range(cols): if self.game.policyGrid[i][j] in [0, 1, 2, 3]: self.game.valueGrid[i][j] = self.theta.dot(self.s2x( (i, j))) def updatePolicyGrid(self): #check if policy change #hasChanged = False #if bestMove is new set to true. rows = self.game.size[0] cols = self.game.size[1] change = False for i in range(rows): for j in range(cols): if self.game.policyGrid[i][j] in [0, 1, 2, 3]: self.game.currentSquare = (i, j) oldMove = self.game.policyGrid[i][j] self.game.policyGrid[i][j] = self.game.bestMove() if oldMove != self.game.policyGrid[i][j]: change = True return change def printGrids(self): self.game.printPolicyGrid() self.game.printReturnGrid() self.game.printValueGrid()