def main(self): self.gameMap = self.game.getMap() self.net = Net(2, 5, 4) self.net.setEpoch(1) # self.net.setLearningMethod(LearningMethod.MOMENTUM) self.train() self.playGame()
def run(self): self.prepareData() self.showData() net = Net(2, 6, 2) self.showNetData(net, 'dumb') net.Train(self.input, self.output) self.showNetData(net, 'trained') plt.show()
def learn(self, epoch, neuronsCount): actionCount = self.game.getNumActions() state = self.game.getState() net = Net(len(state), neuronsCount, actionCount) net.setEpoch(epoch) (iData, oData) = self.transformQToLearningData() net.Train(iData, oData) return net
def qlearningNeuralNetwork(self, learningIterations, neuronsCount, gamma, epsilon = 0.1, net=None): actionCount = self.game.getNumActions() state = self.game.getState() if net == None: net = Net(len(state), neuronsCount, actionCount) net.setEpoch(1) bestScore = 0 for i in range(learningIterations): print('Game Epoch:', i+1, '/', learningIterations, end='\r') self.__lastHoles = 0 self.__gameRunnig = True self.game.reset() state = self.game.getState() while self.__gameRunnig: Q = net.Sim(state) action = Q.index(max(Q)) reward = self.getReward(state) if random() < epsilon: action = randint(0, actionCount-1) self.game.move(action) nextState = self.game.getState() maxNextQ = max(net.Sim(nextState)) Q[action] = reward + gamma * maxNextQ net.Train([state], [Q]) state = nextState score = self.game.getScore() if score > bestScore: bestScore = score net.save('tra_'+str(bestScore)) print() print('Best score:', bestScore) return net
def qlearningNeuralNetwork(self, lake, learningIterations, neuronsCount, eta, gamma, epsilon = 0.1): statesCount = len(lake) * len(lake[0]) actionCount = 4 net = Net(2, neuronsCount, actionCount) net.setEpoch(1) # net.setLearningMethod(LearningMethod.MOMENTUM) for i in range(learningIterations): print('Game Epoch:', i+1, '/', learningIterations, end='\r') state = randint(0, statesCount-1) # state = 0 if self.reward(lake,state) < 0: continue for j in range(40): # os.system('clear') # self.printGrid(state) # time.sleep(0.1) Q = net.Sim(self.stateToNetInput(lake,state)) action = Q.index(max(Q)) if random() < epsilon: action = randint(0, actionCount-1) newState = self.nextState(lake, state, action) rew = self.reward(lake, newState) if newState == -1: maxNewQ = 0 else: maxNewQ = max(net.Sim(self.stateToNetInput(lake,newState))) Q[action] = rew + gamma * maxNewQ net.Train([self.stateToNetInput(lake,state)], [Q]) prevState = state state = newState if state == -1 or self.isFinalState(lake, state): break print() return net
class qNetAlgorithm(object): EPOCH = 50000 GRID_SIZE = 4 def __init__(self): conf = Config() conf.setSize(self.GRID_SIZE, self.GRID_SIZE) conf.setEnd(self.GRID_SIZE - 1, self.GRID_SIZE - 1) conf.addHole(1, 1) conf.addHole(1, 3) conf.addHole(0, 3) conf.addHole(3, 2) game = FrozenLake(conf) game.print() game.onGameOver(self.gameOver) self.config = conf self.game = game self.gamma = 0.8 self.gameRuning = True def main(self): self.gameMap = self.game.getMap() self.net = Net(2, 5, 4) self.net.setEpoch(1) # self.net.setLearningMethod(LearningMethod.MOMENTUM) self.train() self.playGame() def playGame(self): self.createNewGame(0, 0) i = 0 while self.gameRuning: print('\n---\n Move:', i) i += 1 self.game.print() # (maxQ, index) = self.getMaxQ() q = self.getQ() index = q.index(max(q)) a = [0, 0, 0, 0] a[index] = 1 self.printDirection(a) print("Q:", q) self.move(a) if i > 21: return i += 1 print(' Epoch:', i) self.game.print() def printDirection(self, move): if move[0] == 1: print('Direction up') if move[1] == 1: print('Direction down') if move[2] == 1: print('Direction left') if move[3] == 1: print('Direction right') def train(self): for i in range(self.EPOCH): print('Game Epoch:', i + 1, '/', self.EPOCH, end='\r') pos = self.getRandomPosition() (posX, posY) = pos self.createNewGame(posX, posY) while self.gameRuning: state = self.getState() if random() < 0.1: action = self.getRandMove() else: action = self.getBestMove() q = self.getQ() isMove = self.move(action) if isMove: maxNextQ = max(q) reward = self.getReward() else: maxNextQ = 0 reward = -1 rew = reward + self.gamma * maxNextQ q[self.actionToIndex(action)] = rew self.net.Train([state], [q]) if not isMove: break print() def actionToIndex(self, action): return action.index(1) def qCrossReward(self): qTab = [] for i in range(4): self.game.savePosition() a = [0, 0, 0, 0] a[i] = 1 isMove = self.move(a, True) (q, index) = self.getMaxQ() if isMove: reward = self.getReward() else: reward = -50 maxQ = reward + self.gamma * q qTab.append(maxQ) self.game.loadPosition() return qTab def getRandMove(self): r = randint(0, 3) a = [0, 0, 0, 0] a[r] = 1 return a def getBestMove(self): a = [0, 0, 0, 0] (q, i) = self.getMaxQ() a[i] = 1 return a def getMaxQ(self): state = self.getState() q = self.net.Sim(state) index = q.index(max(q)) return (q[index], index) def getQ(self): state = self.getState() return self.net.Sim(state) def getReward(self): (x, y) = self.game.getPosition() return self.getRewardForPosition(x, y) def getRewardForPosition(self, x, y): if not self.checkPosition(x, y): return -1 v = self.gameMap[x][y] if v == 1: return -1 if v == 2: return 1 return 0 def checkPosition(self, x, y): if x < 0: return False if y < 0: return False if x >= self.GRID_SIZE: return False if y >= self.GRID_SIZE: return False return True def move(self, action, isTest=False): if action[0] == 1: return self.game.moveUp(isTest) if action[1] == 1: return self.game.moveDown(isTest) if action[2] == 1: return self.game.moveLeft(isTest) if action[3] == 1: return self.game.moveRight(isTest) def getState(self): (xPos, yPos) = self.game.getPosition() en = [] en.append(xPos) en.append(yPos) return en def getRandomPosition(self): x = randint(0, len(self.gameMap[0]) - 1) y = randint(0, len(self.gameMap) - 1) return (x, y) def createNewGame(self, startX, startY): self.game.resetGame() self.game.setPosition(startX, startY) self.gameRuning = True def gameOver(self, res): self.gameRuning = False
class qNetAlgorithm(object): EPOCH = 5000 def __init__(self): conf = Config() conf.setSize(10, 10) conf.setEnd(9, 9) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) conf.addHole(randint(0, 9), randint(0, 9)) game = FrozenLake(conf) game.print() game.onGameOver(self.gameOver) self.config = conf self.game = game self.gamma = 0.8 self.gameRuning = True def main(self): self.gameMap = self.game.getMap() self.net = Net(6, 30, 1) self.net.setEpoch(1) self.train() self.playGame() def playGame(self): self.createNewGame(0, 0) i = 0 while self.gameRuning: print(' Move:', i) i += 1 self.game.print() (maxQ, action) = self.getMaxQ() self.move(action) if i > 21: return i += 1 print(' Epoch:', i) self.game.print() def train(self): for i in range(self.EPOCH): print('Game Epoch:', i + 1, '/', self.EPOCH, end='\r') pos = self.getRandomPosition() (posX, posY) = pos self.createNewGame(posX, posY) while self.gameRuning: state = self.getState() actions = self.getActions() nextMove = randint(0, len(actions) - 1) nextMove = actions[nextMove] self.move(nextMove) (maxQ, a) = self.getMaxQ() sa = state[:] sa.extend(nextMove) newQ = self.getReward() + self.gamma * maxQ self.net.Train([sa], [[newQ]]) print() def getReward(self): (x, y) = self.game.getPosition() v = self.gameMap[x][y] if v == 1: return -200 if v == 2: return 200 return 0 def getMaxQ(self): state = self.getState() tab = [] actions = self.getActions() for action in actions: cp = state[:] cp.extend(action) q = self.net.Sim(cp) tab.append(q) index = tab.index(max(tab)) return (max(tab)[0], actions[index]) def move(self, action): if action[0] == 1: self.game.moveUp() return if action[1] == 1: self.game.moveDown() return if action[2] == 1: self.game.moveLeft() return if action[3] == 1: self.game.moveRight() return # up # down # left # right def getActions(self): (xPos, yPos) = self.game.getPosition() ac = [] if xPos > 0: ac.append([1, 0, 0, 0]) if xPos < len(self.gameMap) - 1: ac.append([0, 1, 0, 0]) if yPos > 0: ac.append([0, 0, 1, 0]) if yPos < len(self.gameMap[0]) - 1: ac.append([0, 0, 0, 1]) return ac def defineCell(self, x, y): if x < 0 or y < 0 or x > len(self.gameMap) - 1 or y > len( self.gameMap[0]) - 1: return 0 if self.gameMap[x][y] == 1: return -1 return 1 def getState(self): (xPos, yPos) = self.game.getPosition() # (xEndPos, yEndPos) = self.game.getEndPosition() en = [] # en.append(self.defineCell(xPos-1,yPos)) #up # en.append(self.defineCell(xPos+1,yPos)) #down # en.append(self.defineCell(xPos,yPos-1)) #left # en.append(self.defineCell(xPos,yPos+1)) #right # for row in self.gameMap: # for cell in row: # if cell == 1: # en.append(-1) # else: # en.append(1) # en.append(xEndPos - xPos) # en.append(yEndPos - yPos) en.append(xPos) en.append(yPos) return en def getRandomPosition(self): x = randint(0, len(self.gameMap[0]) - 1) y = randint(0, len(self.gameMap) - 1) return (x, y) def createNewGame(self, startX, startY): self.game.resetGame() self.game.setPosition(startX, startY) self.gameRuning = True def gameOver(self, res): self.gameRuning = False
def main(self): self.gameMap = self.game.getMap() self.net = Net(6, 30, 1) self.net.setEpoch(1) self.train() self.playGame()