Python GridWorld.move примеры использования

Язык программирования: Python

Пространство имен/Пакет: GridWorld

Класс/Тип: GridWorld

Метод/Функция: move

Примеров на hotexamples.com: 4

Python GridWorld.move - 4 примера найдено. Это лучшие примеры Python кода для GridWorld.GridWorld.move, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GridWorld(30)

move(4)

bestMove(4)

reset(4)

printValueGrid(4)

printReturnGrid(4)

printPolicyGrid(4)

possibleMoves(4)

gameOver(3)

scan_grid_and_generate_graph(2)

save_graph(2)

render(2)

print_graph(2)

setDiscountFactor(2)

after(2)

createSquareCount(2)

create_grid_ui(2)

getActions(1)

printQValues(1)

move_on_given_route_aco(1)

move_on_given_route_genetic(1)

players(1)

get(1)

printPolicy(1)

generateWalls(1)

dfs_route(1)

generateGoals(1)

move_on_given_route(1)

printValues(1)

print_map(1)

randomAction(1)

cells(1)

addRandomObstacles(1)

move_on_given_route_a_star(1)

getAccessibleCells(1)

getAgentPose(1)

get_next_state(1)

getLength(1)

getPoseFromAction(1)

getPredFromPose(1)

getState(1)

getStateSpace(1)

getTotalSteps(1)

get_grid_world(1)

get_number_of_actions(1)

make_grid_world(1)

get_number_of_states(1)

get_rewards(1)

addAgent(1)

gridDefine(1)

Пример #1

Показать файл

Файл: TDL.py Проект: eriktoger/Reinforcment-Learning

class TDL_solution:
    def __init__(self):
        self.game = GridWorld( (5,5))
        self.squareCountGrid = self.game.createSquareCount()
        self.alpha = 0.1
        self.gamma = 0.9
    
    def playTDLGame(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        
        keepPlaying = not self.game.gameOver()
        squares_and_returns = [(self.game.currentSquare,0)]
     
        while keepPlaying:
            
            #policy
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            move = self.game.policyGrid[i][j]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i,j))
               
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i][j]
            squares_and_returns.append( (self.game.currentSquare,theReturn) )
            keepPlaying = not self.game.gameOver()
        
        G = 0
        self.squares_and_values = []
        for square , theReturn in reversed(squares_and_returns):
            self.squares_and_values.append( (square,G) )
            G = theReturn + self.game.gamma*G
        #self.squares_and_values.reverse()
    
    def playSarsa(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        keepPlaying = not self.game.gameOver()
        
        while keepPlaying:
            
            #policy
            i1 = self.game.currentSquare[0]
            j1 = self.game.currentSquare[1]
            move = self.game.policyGrid[i1][j1]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i1,j1))
                print( str(i1) + " " + str(j1) + " " + str(moves) + " " + str(move) )
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i2 = self.game.currentSquare[0]
            j2 = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i2][j2]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(theReturn + self.gamma*self.game.valueGrid[i2][j2]- self.game.valueGrid[i1][j1] )
            keepPlaying = not self.game.gameOver()
            
    def playQLearning(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        keepPlaying = not self.game.gameOver()
        
        while keepPlaying:
            
            #policy
            i1 = self.game.currentSquare[0]
            j1 = self.game.currentSquare[1]
            move = self.game.policyGrid[i1][j1]
            
            # we use the best move even if random runs over it
            i3 = self.game.currentSquare[0]
            j3 = self.game.currentSquare[1]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i1,j1))
                print( str(i1) + " " + str(j1) + " " + str(moves) + " " + str(move) )
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i2 = self.game.currentSquare[0]
            j2 = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i2][j2]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(theReturn + self.gamma*self.game.valueGrid[i3][j3]- self.game.valueGrid[i1][j1] )
            keepPlaying = not self.game.gameOver()
    
        
        
    def updateValueGrid(self):
        for t in range(len(self.squares_and_values) -1):
            
            square , _ = self.squares_and_values[t]
            nextSquare, value = self.squares_and_values[t+1]
            i1 = square[0]
            j1 = square[1]
            i2 = nextSquare[0]
            j2 = nextSquare[1]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(value + self.gamma*self.game.valueGrid[i2][j2]- self.game.valueGrid[i1][j1] )  
    
    def updatePolicyGrid(self):
        
        #check if policy change
        #hasChanged = False
        #if bestMove is new set to true.
        rows = self.game.size[0]
        cols = self.game.size[1]
        change = False
        for i in range(rows):
            for j in range(cols):
                if self.game.policyGrid[i][j] in [0,1,2,3]:
                    self.game.currentSquare = (i,j)
                    oldMove = self.game.policyGrid[i][j]
                    self.game.policyGrid[i][j] = self.game.bestMove()
                    if oldMove != self.game.policyGrid[i][j]:
                        change = True
        return change
        
        
    def printGrids(self):
        self.game.printPolicyGrid()
        self.game.printReturnGrid()
        self.game.printValueGrid()

Пример #2

Показать файл

class MC_solution:
    def __init__(self):
        self.game = GridWorld((5, 5))
        self.squareCountGrid = self.game.createSquareCount()

    def playMCGame(self, startSquare, randomMove):
        self.game.currentSquare = startSquare

        keepPlaying = not self.game.gameOver()
        squares_and_returns = [(self.game.currentSquare, 0)]

        while keepPlaying:

            #policy
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            move = self.game.policyGrid[i][j]

            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i, j))

                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0, len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i][j]
            squares_and_returns.append((self.game.currentSquare, theReturn))
            keepPlaying = not self.game.gameOver()

        G = 0
        self.squares_and_values = []
        for square, theReturn in reversed(squares_and_returns):
            self.squares_and_values.append((square, G))
            G = theReturn + self.game.gamma * G
        #self.squares_and_values.reverse()

    def updateValueGrid(self):
        visitedSquares = set()

        for square, G in self.squares_and_values:
            #print(square)
            if not square in visitedSquares:
                visitedSquares.add(square)
                i = square[0]
                j = square[1]
                self.squareCountGrid[i][j] += 1
                self.game.valueGrid[i][j] = self.game.valueGrid[i][j] + (
                    G - self.game.valueGrid[i][j]) / self.squareCountGrid[i][j]

    def updatePolicyGrid(self):

        #check if policy change
        #hasChanged = False
        #if bestMove is new set to true.
        rows = self.game.size[0]
        cols = self.game.size[1]
        change = False
        for i in range(rows):
            for j in range(cols):
                if self.game.policyGrid[i][j] in [0, 1, 2, 3]:
                    self.game.currentSquare = (i, j)
                    oldMove = self.game.policyGrid[i][j]
                    self.game.policyGrid[i][j] = self.game.bestMove()
                    if oldMove != self.game.policyGrid[i][j]:
                        change = True
        return change

    def printGrids(self):
        self.game.printPolicyGrid()
        self.game.printReturnGrid()
        self.game.printValueGrid()
        print(self.squareCountGrid)

Пример #3

Показать файл

Файл: MazeRunner.py Проект: eriktoger/Reinforcment-Learning

class MazeRunner:
    def __init__(self, pygameIn):
        # initialize the pygame module
        self.pygame = pygameIn
        self.pygame.init()

        # load and set the logo

        self.UP = 0
        self.RIGHT = 1
        self.DOWN = 2
        self.LEFT = 3

        self.MAX_Y = 20 * 32
        self.MAX_X = 16 * 32
        self.INFO_X = 6 * 32
        self.INFO_Y = self.MAX_Y

        #print(self.START_X)
        #print(self.START_Y)
        # positions and borders

        self.stepSize = 32

        self.leftWall = 0

        self.upperWall = 0

        #screen and background
        logo = self.pygame.image.load("unicorn32.bmp")

        self.pygame.display.set_icon(logo)
        self.pygame.display.set_caption("Maze Runner")
        self.screen = self.pygame.display.set_mode(
            (self.MAX_X + self.INFO_X, self.MAX_Y))

        self.score = 0

        #border 16px of grey/white

        self.BLACK = (0, 0, 0)
        self.WHITE = (255, 255, 255)
        # main loop

        self.menuDict = {'Play': 1, 'DP': 2, 'MC': 3, 'Exit': 4}

        self.mazeDict = {'small': 1, 'medium': 2, 'large': 3, 'Exit': 4}

        self.loadImages()

    def loadImages(self):
        self.unicornImage = self.pygame.image.load("unicorn32.bmp")
        self.rainbowImage = pygame.image.load("rainbow32.bmp")
        self.wallImage = pygame.image.load("brick32.bmp")
        self.hellImage = pygame.image.load("hell32.bmp")
        self.appleImage = pygame.image.load("apple32.bmp")
        self.bombImage = pygame.image.load("bomb32.bmp")

    def drawBorder(self):

        FRAME = 8
        color = (255, 255, 255)

        x1 = self.START_X - FRAME - 2
        y1 = self.START_Y - FRAME

        x2 = self.START_X + self.MAZE_X + FRAME
        y2 = y1
        #y2 = self.START_Y + self.MAZE_X + FRAME
        self.pygame.draw.line(self.screen, color, (x1, y1), (x2, y2), FRAME)

        #left
        x1 = self.START_X - FRAME
        y1 = self.START_Y - FRAME - 2

        x2 = x1
        y2 = self.START_Y + self.MAZE_X + FRAME
        self.pygame.draw.line(self.screen, color, (x1, y1), (x2, y2), FRAME)

        #right
        x1 = self.START_X + self.MAZE_X + FRAME
        y1 = self.START_Y - FRAME

        x2 = x1
        y2 = self.START_Y + self.MAZE_X + FRAME
        self.pygame.draw.line(self.screen, color, (x1, y1), (x2, y2), FRAME)

        #bottom
        x1 = self.START_X - FRAME
        y1 = self.START_Y + self.MAZE_X + FRAME

        y2 = self.START_X + self.MAZE_X + FRAME
        y2 = self.START_Y + self.MAZE_X + FRAME
        self.pygame.draw.line(self.screen, color, (x1, y1), (x2, y2), FRAME)
        self.pygame.display.flip()

    def placeTokens(self):
        cols = self.GridWorldGame.size[0]
        rows = self.GridWorldGame.size[1]

        returnValue = self.GridWorldGame.returnGridValue
        for i in range(rows):
            for j in range(cols):
                if self.GridWorldGame.policyGrid[i][j] == -1:
                    x = self.START_X + j * 32
                    y = self.START_Y + i * 32
                    self.screen.blit(self.wallImage, (x, y))
                if self.GridWorldGame.policyGrid[i][
                        j] == 9 and self.GridWorldGame.returnGrid[i][j] > 0:
                    x = self.START_X + j * 32
                    y = self.START_Y + i * 32
                    self.screen.blit(self.rainbowImage, (x, y))
                if self.GridWorldGame.policyGrid[i][
                        j] == 9 and self.GridWorldGame.returnGrid[i][j] < 0:
                    x = self.START_X + j * 32
                    y = self.START_Y + i * 32
                    self.screen.blit(self.hellImage, (x, y))
                if not self.GridWorldGame.policyGrid[i][j] in [
                        1, 9
                ] and self.GridWorldGame.returnGrid[i][j] > returnValue:
                    x = self.START_X + j * 32
                    y = self.START_Y + i * 32
                    self.screen.blit(self.appleImage, (x, y))
                if not self.GridWorldGame.policyGrid[i][j] in [
                        1, 9
                ] and self.GridWorldGame.returnGrid[i][j] < returnValue:
                    x = self.START_X + j * 32
                    y = self.START_Y + i * 32
                    self.screen.blit(self.bombImage, (x, y))

        self.screen.blit(self.unicornImage, self.smileyPos)
        self.pygame.display.flip()

    def run(self):
        self.mainMenu()
        self.pygame.quit()

    def play(self):
        # event handling, gets all event from the event queue

        running = True

        while (running):
            self.pygame.time.delay(100)
            self.pygame.event.pump()
            key = self.pygame.key.get_pressed()

            if key[self.pygame.K_LEFT]:
                self.move(self.LEFT)
                self.printScore()
            if key[self.pygame.K_UP]:
                self.move(self.UP)
                self.printScore()
            if key[self.pygame.K_RIGHT]:
                self.move(self.RIGHT)
                self.printScore()
            if key[self.pygame.K_DOWN]:
                self.move(self.DOWN)
                self.printScore()

            if self.GridWorldGame.gameOver():
                #self.pygame.quit()
                running = False
                break

            if key[self.pygame.K_q]:
                pass

            for event in self.pygame.event.get():
                # only do something if the event is of type QUIT
                if event.type == self.pygame.QUIT:
                    # maybe should go back to main menu or so
                    self.pygame.quit()
                    running = False

    def mazeMenu(self):
        self.clearScreen()

        self.menuItems = 0
        self.menuItemsPos = []
        self.menuItemIdx = 1
        self.printText('Choose Maze')
        self.printText('Small Maze')
        self.printText('Medium')
        self.printText('Large')
        self.printText('Back')

        self.pygame.display.flip()

        self.pygame.time.delay(100)
        return self.chooseMaze()

    def chooseMaze(self):
        idx = 1
        cursor = self.menuItemsPos[idx]
        self.screen.blit(self.unicornImage, cursor)

        while (True):
            self.pygame.time.delay(100)

            key = self.pygame.key.get_pressed()
            self.pygame.event.pump()

            x1 = cursor[0]
            y1 = cursor[1]
            if key[self.pygame.K_UP] and idx > 1:
                self.pygame.draw.rect(self.screen, self.BLACK,
                                      (x1, y1, 32, 32))
                idx -= 1
                cursor = self.menuItemsPos[idx]
                self.screen.blit(self.unicornImage, cursor)

            if key[self.pygame.K_DOWN] and idx < (self.menuItems - 1):
                self.pygame.draw.rect(self.screen, self.BLACK,
                                      (x1, y1, 32, 32))
                idx += 1
                cursor = self.menuItemsPos[idx]
                self.screen.blit(self.unicornImage, cursor)

            if key[self.pygame.K_RETURN]:
                if idx == self.mazeDict['small']:
                    self.createSmallMaze()
                    return True

                if idx == self.mazeDict['medium']:
                    pass
                    return True
                if idx == self.mazeDict['Exit']:
                    return False

            self.pygame.display.flip()
            if self.quitting():
                return False

    def createSmallMaze(self):
        #should be GridWorldSmall()
        self.GridWorldGame = GridWorld((5, 5))
        cols = self.GridWorldGame.size[0]
        rows = self.GridWorldGame.size[1]
        self.MAZE_X = cols * 32
        self.MAZE_Y = rows * 32
        FRAME = 8
        self.START_X = (self.MAX_X - cols *
                        32) / 2 + FRAME  #what happens if its not 0 in %32
        self.START_Y = (self.MAX_Y - rows * 32) / 2 + FRAME
        self.smileyPos = (self.START_X, self.START_Y)

    def dynamicProgramming(self):
        pass

    def setupGame(self):
        self.clearScreen()
        self.drawBorder()
        self.placeTokens()
        self.setupRightArea()
        self.GridWorldGame.returnCount = 0
        self.GridWorldGame.currentSquare = (0, 0)
        self.printScore()

    def setupRightArea(self):
        FRAME = 4
        white = (255, 255, 255)
        black = (0, 0, 0)

        x1 = self.MAX_X
        y1 = 0

        x2 = self.MAX_X
        y2 = self.MAX_Y
        #y2 = self.START_Y + self.MAZE_X + FRAME
        self.pygame.draw.line(self.screen, white, (x1, y1), (x2, y2), FRAME)

        fontSize = 32
        fontScore = self.pygame.freetype.Font('freesansbold.ttf', fontSize)

        x1 = self.MAX_X + 64
        y1 = 64

        x2 = 0
        y2 = 0

        (textScore, textposScore) = fontScore.render("Score", white, black)
        textposScore = [x1, y1, x2, y2]
        self.screen.blit(textScore, textposScore)

        rainbowImage = pygame.image.load("rainbow32.bmp")
        wallImage = pygame.image.load("brick32.bmp")
        hellImage = pygame.image.load("hell32.bmp")
        appleImage = pygame.image.load("apple32.bmp")
        bombImage = pygame.image.load("bomb32.bmp")

        fontSize = 24
        adjustY = 12
        #collect apples
        x1 = self.MAX_X + 8
        y1 = 192

        lengthOfText = self.printTextRightArea(24, "Collect: ", x1, y1)
        self.screen.blit(appleImage, (x1 + lengthOfText, y1 - adjustY))

        #dont collect bombs
        x1 = self.MAX_X + 8
        y1 = 256

        lengthOfText = self.printTextRightArea(24, "Avoid: ", x1, y1)
        self.screen.blit(bombImage, (x1 + lengthOfText, y1 - adjustY))

        #rainbow is good exit
        x1 = self.MAX_X + 8
        y1 = 320

        lengthOfText = self.printTextRightArea(24, "Good Exit: ", x1, y1)
        self.screen.blit(rainbowImage, (x1 + lengthOfText, y1 - adjustY))

        #Flame is bad exit
        x1 = self.MAX_X + 8
        y1 = 384

        lengthOfText = self.printTextRightArea(24, "Bad Exit: ", x1, y1)
        self.screen.blit(hellImage, (x1 + lengthOfText, y1 - adjustY))

        # its a wall
        x1 = self.MAX_X + 8
        y1 = 448

        lengthOfText = self.printTextRightArea(24, "Just a wall: ", x1, y1)
        self.screen.blit(wallImage, (x1 + lengthOfText, y1 - adjustY))

        self.pygame.display.flip()

    def printTextRightArea(self, fontSize, text, x, y):

        fontToken = self.pygame.freetype.Font('freesansbold.ttf', fontSize)
        #collect apples
        x1 = x
        y1 = y

        x2 = 0
        y2 = 0

        (textCollect,
         textposCollect) = fontToken.render(text, self.WHITE, self.BLACK)
        lengthOfCollect = textposCollect[2] - textposCollect[0]
        textposCollect = [x1, y1, x2, y2]
        self.screen.blit(textCollect, textposCollect)

        return lengthOfCollect

    def printScore(self):
        score = str(self.GridWorldGame.returnCount)
        fontSize = 32

        #erase is it needed? think so
        x1 = self.MAX_X + self.INFO_X / 2
        y1 = 3 * 32
        self.pygame.draw.rect(self.screen, self.BLACK, (x1, y1, 32, 32))

        fontScore = self.pygame.freetype.Font('freesansbold.ttf', fontSize)

        white = (255, 255, 255)
        black = (0, 0, 0)
        (textScore, textposScore) = fontScore.render(score, white, black)
        textposScore = [x1, y1, 0, 0]
        self.screen.blit(textScore, textposScore)

        self.pygame.display.flip()

    def move(self, direction):
        oldSquare = self.GridWorldGame.currentSquare
        x1 = oldSquare[1] * 32 + self.START_X
        y1 = oldSquare[0] * 32 + self.START_Y

        #print(oldSquare)
        self.GridWorldGame.move(direction)

        newSquare = self.GridWorldGame.currentSquare
        #print(newSquare)
        x2 = newSquare[1] * 32 + self.START_X
        y2 = newSquare[0] * 32 + self.START_Y

        self.pygame.draw.rect(self.screen, self.BLACK, (x1, y1, 32, 32))
        self.screen.blit(self.unicornImage, (x2, y2))
        self.pygame.display.flip()

    def mainMenu(self):
        #https://www.programcreek.com/python/example/93421/pygame.freetype
        running = True
        while (running):

            self.clearScreen()

            self.menuItems = 0
            self.menuItemsPos = []
            self.menuItemIdx = 1

            self.printText('Main menu')
            self.printText('Play game')
            self.printText('Dynamic Programming')
            self.printText('Monte Carlo')
            self.printText('Exit')

            self.pygame.display.flip()

            self.pygame.time.delay(100)
            running = self.choseFromMenu()

    def printText(self, text):
        if self.menuItems == 0:
            fontSize = 48
            startY = 32
        else:
            fontSize = 24
            startY = 32 + self.menuItems * 48

        fontMenu = self.pygame.freetype.Font('freesansbold.ttf', fontSize)
        white = (255, 255, 255)
        black = (0, 0, 0)
        (textMenu, textposMenu) = fontMenu.render(text, white, black)
        lengthOfText = textposMenu[2] - textposMenu[0]
        textposMenu[0] = (self.MAX_X + self.INFO_X - lengthOfText) / 2
        textposMenu[1] = startY
        textposMenu[2] = textposMenu[2] + lengthOfText
        textposMenu[3] = textposMenu[1] + 32

        self.menuItemsPos.append((textposMenu[0] - 32, startY))

        self.screen.blit(textMenu, textposMenu)

        self.menuItems += 1
        #self.pygame.display.flip()
    def choseFromMenu(self):
        idx = 1
        cursor = self.menuItemsPos[idx]
        self.screen.blit(self.unicornImage, cursor)

        while (True):
            self.pygame.time.delay(100)

            key = self.pygame.key.get_pressed()
            self.pygame.event.pump()

            x1 = cursor[0]
            y1 = cursor[1]
            if key[self.pygame.K_UP] and idx > 1:
                self.pygame.draw.rect(self.screen, self.BLACK,
                                      (x1, y1, 32, 32))
                idx -= 1
                cursor = self.menuItemsPos[idx]
                self.screen.blit(self.unicornImage, cursor)

            if key[self.pygame.K_DOWN] and idx < (self.menuItems - 1):
                self.pygame.draw.rect(self.screen, self.BLACK,
                                      (x1, y1, 32, 32))
                idx += 1
                cursor = self.menuItemsPos[idx]
                self.screen.blit(self.unicornImage, cursor)

            if key[self.pygame.K_RETURN]:
                if idx == self.menuDict['Play']:
                    if self.mazeMenu():
                        self.setupGame()
                        self.play()
                        self.endScreen()
                        return True
                    else:
                        return True
                if idx == self.menuDict['DP']:
                    self.dynamicProgramming()
                    return True
                if idx == self.menuDict['Exit']:
                    return False

            self.pygame.display.flip()
            if self.quitting():
                return False

    def quitting(self):
        for event in self.pygame.event.get():
            # only do something if the event is of type QUIT
            if event.type == self.pygame.QUIT:
                # maybe should go back to main menu or so
                self.pygame.quit()
                return True
        return False

    def clearScreen(self):
        self.pygame.draw.rect(self.screen, self.BLACK,
                              (0, 0, self.MAX_X + self.INFO_X, self.MAX_Y))
        pass

    def endScreen(self):
        #remove maze
        self.clearScreen()
        fontSize = 32
        fontScore = self.pygame.freetype.Font('freesansbold.ttf', fontSize)
        white = (255, 255, 255)
        black = (0, 0, 0)

        (textScore, textposScore) = fontScore.render("your score is", white,
                                                     black)
        lengthOfText = textposScore[2] - textposScore[0]
        textposScore[0] = (self.MAX_X + self.INFO_X - lengthOfText) / 2
        textposScore[1] = 64
        textposScore[2] = 0
        textposScore[3] = 0

        self.screen.blit(textScore, textposScore)

        score = str(self.GridWorldGame.returnCount)

        (textScore, textposScore) = fontScore.render(score, white, black)
        lengthOfText = textposScore[2] - textposScore[0]
        textposScore[0] = (self.MAX_X + self.INFO_X - lengthOfText) / 2
        textposScore[1] = 128
        textposScore[2] = 0
        textposScore[3] = 0

        self.screen.blit(textScore, textposScore)

        (textScore,
         textposScore) = fontScore.render("Press Q to get back to menu", white,
                                          black)
        lengthOfText = textposScore[2] - textposScore[0]
        textposScore[0] = (self.MAX_X + self.INFO_X - lengthOfText) / 2
        textposScore[1] = 192
        textposScore[2] = 0
        textposScore[3] = 0

        self.screen.blit(textScore, textposScore)

        self.pygame.display.flip()
        #your score was
        #back to main menu?
        while (True):
            self.pygame.time.delay(100)
            self.pygame.event.pump()
            key = self.pygame.key.get_pressed()
            if key[self.pygame.K_q]:
                break

        for event in self.pygame.event.get():
            # only do something if the event is of type QUIT
            if event.type == self.pygame.QUIT:
                # maybe should go back to main menu or so
                self.pygame.quit()
                break

Пример #4

Показать файл

Файл: MC_Aprox.py Проект: eriktoger/Reinforcment-Learning

class MC_Aprox_Solution:
    def __init__(self):
        self.game = GridWorld((5, 5))
        self.learning_rate = 0.001
        self.theta = np.random.randn(4) / 2

    def s2x(self, square):
        return np.array(
            [square[0] - 1, square[1] - 1.5, square[0] * square[1] - 3, 1])

    def playMCGame(self, startSquare, randomMove):
        self.game.currentSquare = startSquare

        keepPlaying = not self.game.gameOver()
        squares_and_returns = [(self.game.currentSquare, 0)]
        counter = 0
        while keepPlaying:

            counter += 1
            if counter > 2000:
                return False

            #policy
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            move = self.game.policyGrid[i][j]

            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i, j))

                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0, len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i][j]
            squares_and_returns.append((self.game.currentSquare, theReturn))
            keepPlaying = not self.game.gameOver()

        G = 0
        self.squares_and_values = []
        for square, theReturn in reversed(squares_and_returns):
            self.squares_and_values.append((square, G))
            G = theReturn + self.game.gamma * G

        return True

    def updateValueGrid(self, t):
        visitedSquares = set()

        alpha = self.learning_rate / (t + 1)
        for square, G in self.squares_and_values:
            #print(square)
            if not square in visitedSquares:
                visitedSquares.add(square)

                old_theta = self.theta.copy()
                x = self.s2x(square)
                V_hat = theta.dot(x)

                self.theta += alpha * (G - V_hat) * x

        rows = self.game.size[0]
        cols = self.game.size[1]
        for i in range(rows):
            for j in range(cols):
                if self.game.policyGrid[i][j] in [0, 1, 2, 3]:
                    self.game.valueGrid[i][j] = self.theta.dot(self.s2x(
                        (i, j)))

    def updatePolicyGrid(self):

        #check if policy change
        #hasChanged = False
        #if bestMove is new set to true.
        rows = self.game.size[0]
        cols = self.game.size[1]
        change = False
        for i in range(rows):
            for j in range(cols):
                if self.game.policyGrid[i][j] in [0, 1, 2, 3]:
                    self.game.currentSquare = (i, j)
                    oldMove = self.game.policyGrid[i][j]
                    self.game.policyGrid[i][j] = self.game.bestMove()
                    if oldMove != self.game.policyGrid[i][j]:
                        change = True
        return change

    def printGrids(self):
        self.game.printPolicyGrid()
        self.game.printReturnGrid()
        self.game.printValueGrid()