Exemplo n.º 1
0
 def main(self):
     self.gameMap = self.game.getMap()
     self.net = Net(2, 5, 4)
     self.net.setEpoch(1)
     # self.net.setLearningMethod(LearningMethod.MOMENTUM)
     self.train()
     self.playGame()
Exemplo n.º 2
0
 def run(self):
     self.prepareData()
     self.showData()
     net = Net(2, 6, 2)
     self.showNetData(net, 'dumb')
     net.Train(self.input, self.output)
     self.showNetData(net, 'trained')
     plt.show()
Exemplo n.º 3
0
Arquivo: tq1.py Projeto: altek42/inz
    def learn(self, epoch, neuronsCount):
        actionCount = self.game.getNumActions()
        state = self.game.getState()
        net = Net(len(state), neuronsCount, actionCount)
        net.setEpoch(epoch)

        (iData, oData) = self.transformQToLearningData()
        net.Train(iData, oData)
        return net
Exemplo n.º 4
0
	def qlearningNeuralNetwork(self, learningIterations, neuronsCount, gamma, epsilon = 0.1, net=None):
		actionCount = self.game.getNumActions()
		state = self.game.getState()

		if net == None:
			net = Net(len(state), neuronsCount, actionCount)
			net.setEpoch(1)

		bestScore = 0

		for i in range(learningIterations):
			print('Game Epoch:', i+1, '/', learningIterations, end='\r')
			self.__lastHoles = 0
			self.__gameRunnig = True
			self.game.reset()

			state = self.game.getState()
			while self.__gameRunnig:
				Q = net.Sim(state)
				action = Q.index(max(Q))
				reward = self.getReward(state)

				if random() < epsilon:
					action = randint(0, actionCount-1)
				self.game.move(action)
				nextState = self.game.getState()
				maxNextQ = max(net.Sim(nextState))
				Q[action] = reward + gamma * maxNextQ
				net.Train([state], [Q])
				state = nextState
			score = self.game.getScore()
			if score > bestScore:
				bestScore = score
				net.save('tra_'+str(bestScore))
		print()
		print('Best score:', bestScore)
		return net
Exemplo n.º 5
0
Arquivo: try1.py Projeto: altek42/inz
	def qlearningNeuralNetwork(self, lake, learningIterations, neuronsCount, eta, gamma, epsilon = 0.1):
		statesCount = len(lake) * len(lake[0])
		actionCount = 4

		net = Net(2, neuronsCount, actionCount)
		net.setEpoch(1)
		# net.setLearningMethod(LearningMethod.MOMENTUM)

		for i in range(learningIterations):
			print('Game Epoch:', i+1, '/', learningIterations, end='\r')
			state = randint(0, statesCount-1)
			# state = 0
			if self.reward(lake,state) < 0:
				continue

			for j in range(40):
				# os.system('clear')
				# self.printGrid(state)
				# time.sleep(0.1)
				
				Q = net.Sim(self.stateToNetInput(lake,state))
				action = Q.index(max(Q))

				if random() < epsilon:
					action = randint(0, actionCount-1)
				
				newState = self.nextState(lake, state, action)
				rew = self.reward(lake, newState)

				if newState == -1:
					maxNewQ = 0
				else:
					maxNewQ = max(net.Sim(self.stateToNetInput(lake,newState)))
				Q[action] = rew + gamma * maxNewQ
				net.Train([self.stateToNetInput(lake,state)], [Q])

				prevState = state
				state = newState

				if state == -1 or self.isFinalState(lake, state):
					break

		print()
		return net
Exemplo n.º 6
0
class qNetAlgorithm(object):
    EPOCH = 50000
    GRID_SIZE = 4

    def __init__(self):
        conf = Config()
        conf.setSize(self.GRID_SIZE, self.GRID_SIZE)
        conf.setEnd(self.GRID_SIZE - 1, self.GRID_SIZE - 1)
        conf.addHole(1, 1)
        conf.addHole(1, 3)
        conf.addHole(0, 3)
        conf.addHole(3, 2)

        game = FrozenLake(conf)
        game.print()
        game.onGameOver(self.gameOver)

        self.config = conf
        self.game = game
        self.gamma = 0.8
        self.gameRuning = True

    def main(self):
        self.gameMap = self.game.getMap()
        self.net = Net(2, 5, 4)
        self.net.setEpoch(1)
        # self.net.setLearningMethod(LearningMethod.MOMENTUM)
        self.train()
        self.playGame()

    def playGame(self):
        self.createNewGame(0, 0)
        i = 0
        while self.gameRuning:
            print('\n---\n Move:', i)
            i += 1
            self.game.print()
            # (maxQ, index) = self.getMaxQ()
            q = self.getQ()
            index = q.index(max(q))
            a = [0, 0, 0, 0]
            a[index] = 1
            self.printDirection(a)
            print("Q:", q)
            self.move(a)
            if i > 21:
                return
        i += 1
        print(' Epoch:', i)
        self.game.print()

    def printDirection(self, move):
        if move[0] == 1:
            print('Direction up')
        if move[1] == 1:
            print('Direction down')
        if move[2] == 1:
            print('Direction left')
        if move[3] == 1:
            print('Direction right')

    def train(self):
        for i in range(self.EPOCH):
            print('Game Epoch:', i + 1, '/', self.EPOCH, end='\r')
            pos = self.getRandomPosition()
            (posX, posY) = pos
            self.createNewGame(posX, posY)
            while self.gameRuning:
                state = self.getState()
                if random() < 0.1:
                    action = self.getRandMove()
                else:
                    action = self.getBestMove()

                q = self.getQ()
                isMove = self.move(action)
                if isMove:
                    maxNextQ = max(q)
                    reward = self.getReward()
                else:
                    maxNextQ = 0
                    reward = -1
                rew = reward + self.gamma * maxNextQ
                q[self.actionToIndex(action)] = rew

                self.net.Train([state], [q])

                if not isMove:
                    break

        print()

    def actionToIndex(self, action):
        return action.index(1)

    def qCrossReward(self):
        qTab = []
        for i in range(4):
            self.game.savePosition()
            a = [0, 0, 0, 0]
            a[i] = 1
            isMove = self.move(a, True)
            (q, index) = self.getMaxQ()
            if isMove:
                reward = self.getReward()
            else:
                reward = -50
            maxQ = reward + self.gamma * q
            qTab.append(maxQ)
            self.game.loadPosition()
        return qTab

    def getRandMove(self):
        r = randint(0, 3)
        a = [0, 0, 0, 0]
        a[r] = 1
        return a

    def getBestMove(self):
        a = [0, 0, 0, 0]
        (q, i) = self.getMaxQ()
        a[i] = 1
        return a

    def getMaxQ(self):
        state = self.getState()
        q = self.net.Sim(state)
        index = q.index(max(q))
        return (q[index], index)

    def getQ(self):
        state = self.getState()
        return self.net.Sim(state)

    def getReward(self):
        (x, y) = self.game.getPosition()
        return self.getRewardForPosition(x, y)

    def getRewardForPosition(self, x, y):
        if not self.checkPosition(x, y):
            return -1
        v = self.gameMap[x][y]
        if v == 1:
            return -1
        if v == 2:
            return 1
        return 0

    def checkPosition(self, x, y):
        if x < 0:
            return False
        if y < 0:
            return False
        if x >= self.GRID_SIZE:
            return False
        if y >= self.GRID_SIZE:
            return False
        return True

    def move(self, action, isTest=False):
        if action[0] == 1:
            return self.game.moveUp(isTest)
        if action[1] == 1:
            return self.game.moveDown(isTest)
        if action[2] == 1:
            return self.game.moveLeft(isTest)
        if action[3] == 1:
            return self.game.moveRight(isTest)

    def getState(self):
        (xPos, yPos) = self.game.getPosition()
        en = []
        en.append(xPos)
        en.append(yPos)
        return en

    def getRandomPosition(self):
        x = randint(0, len(self.gameMap[0]) - 1)
        y = randint(0, len(self.gameMap) - 1)
        return (x, y)

    def createNewGame(self, startX, startY):
        self.game.resetGame()
        self.game.setPosition(startX, startY)
        self.gameRuning = True

    def gameOver(self, res):
        self.gameRuning = False
Exemplo n.º 7
0
class qNetAlgorithm(object):
    EPOCH = 5000

    def __init__(self):
        conf = Config()
        conf.setSize(10, 10)
        conf.setEnd(9, 9)
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))
        conf.addHole(randint(0, 9), randint(0, 9))

        game = FrozenLake(conf)
        game.print()
        game.onGameOver(self.gameOver)

        self.config = conf
        self.game = game
        self.gamma = 0.8
        self.gameRuning = True

    def main(self):
        self.gameMap = self.game.getMap()
        self.net = Net(6, 30, 1)
        self.net.setEpoch(1)
        self.train()
        self.playGame()

    def playGame(self):
        self.createNewGame(0, 0)
        i = 0
        while self.gameRuning:
            print(' Move:', i)
            i += 1
            self.game.print()
            (maxQ, action) = self.getMaxQ()
            self.move(action)
            if i > 21:
                return
        i += 1
        print(' Epoch:', i)
        self.game.print()

    def train(self):
        for i in range(self.EPOCH):
            print('Game Epoch:', i + 1, '/', self.EPOCH, end='\r')
            pos = self.getRandomPosition()
            (posX, posY) = pos
            self.createNewGame(posX, posY)
            while self.gameRuning:
                state = self.getState()
                actions = self.getActions()
                nextMove = randint(0, len(actions) - 1)
                nextMove = actions[nextMove]
                self.move(nextMove)
                (maxQ, a) = self.getMaxQ()
                sa = state[:]
                sa.extend(nextMove)
                newQ = self.getReward() + self.gamma * maxQ
                self.net.Train([sa], [[newQ]])
        print()

    def getReward(self):
        (x, y) = self.game.getPosition()
        v = self.gameMap[x][y]
        if v == 1:
            return -200
        if v == 2:
            return 200
        return 0

    def getMaxQ(self):
        state = self.getState()
        tab = []
        actions = self.getActions()
        for action in actions:
            cp = state[:]
            cp.extend(action)
            q = self.net.Sim(cp)
            tab.append(q)
        index = tab.index(max(tab))
        return (max(tab)[0], actions[index])

    def move(self, action):
        if action[0] == 1:
            self.game.moveUp()
            return
        if action[1] == 1:
            self.game.moveDown()
            return
        if action[2] == 1:
            self.game.moveLeft()
            return
        if action[3] == 1:
            self.game.moveRight()
            return

    # up
    # down
    # left
    # right
    def getActions(self):
        (xPos, yPos) = self.game.getPosition()
        ac = []
        if xPos > 0:
            ac.append([1, 0, 0, 0])
        if xPos < len(self.gameMap) - 1:
            ac.append([0, 1, 0, 0])
        if yPos > 0:
            ac.append([0, 0, 1, 0])
        if yPos < len(self.gameMap[0]) - 1:
            ac.append([0, 0, 0, 1])
        return ac

    def defineCell(self, x, y):
        if x < 0 or y < 0 or x > len(self.gameMap) - 1 or y > len(
                self.gameMap[0]) - 1:
            return 0
        if self.gameMap[x][y] == 1:
            return -1
        return 1

    def getState(self):
        (xPos, yPos) = self.game.getPosition()
        # (xEndPos, yEndPos) = self.game.getEndPosition()
        en = []
        # en.append(self.defineCell(xPos-1,yPos)) #up
        # en.append(self.defineCell(xPos+1,yPos)) #down
        # en.append(self.defineCell(xPos,yPos-1)) #left
        # en.append(self.defineCell(xPos,yPos+1)) #right

        # for row in self.gameMap:
        # 	for cell in row:
        # 		if cell == 1:
        # 			en.append(-1)
        # 		else:
        # 			en.append(1)

        # en.append(xEndPos - xPos)
        # en.append(yEndPos - yPos)
        en.append(xPos)
        en.append(yPos)
        return en

    def getRandomPosition(self):
        x = randint(0, len(self.gameMap[0]) - 1)
        y = randint(0, len(self.gameMap) - 1)
        return (x, y)

    def createNewGame(self, startX, startY):
        self.game.resetGame()
        self.game.setPosition(startX, startY)
        self.gameRuning = True

    def gameOver(self, res):
        self.gameRuning = False
Exemplo n.º 8
0
 def main(self):
     self.gameMap = self.game.getMap()
     self.net = Net(6, 30, 1)
     self.net.setEpoch(1)
     self.train()
     self.playGame()