示例#1
0
 def startGame(self, trainingGame=False, learningRate=0.1, decay=0.99):
     cross = 1
     circle = -1
     playerMoves = []
     playerBoard = []
     neuralMoves = []
     neuralBoard = []
     mark = circle
     game = TicTacToe()
     entry = ""
     while entry != "exit" and game.finished() == False:
         if entry == "print":
             self.print()
         else:
             game.print()
             if mark == cross:
                 mark = circle
                 board = game.getBoard(mark)
                 coord = self.action(board)
                 x = coord[0][0]
                 y = coord[0][1]
                 #print(coord)
                 print("x: " + str(x) + ", y: " + str(y))
                 while game.addMark(mark, x, y) == False:
                     if trainingGame == False:
                         break
                     out = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                     value = self.coordinatesToIndex(x, y)
                     out[value] = 0.0
                     self.NN.trainWithOutput(learningRate, out)
                     coord = self.action(board)
                     x = coord[0][0]
                     y = coord[0][1]
                 if trainingGame:
                     value = self.coordinatesToIndex(x, y)
                     neuralMoves.append(value)
                     neuralBoard.append(board)
             else:
                 mark = cross
                 entry = input()
                 if entry != "exit":
                     split = entry.split(',')
                     x = int(split[0])
                     y = int(split[1])
                     if game.addMark(mark, x, y) and trainingGame:
                         board = game.getBoard(mark)
                         value = self.coordinatesToIndex(x, y)
                         playerMoves.append(value)
                         playerBoard.append(board)
         print()
     if game.finished() == True:
         game.print()
         winner = game.getWinner()
         print("Winner: " + str(winner))
         if trainingGame:
             neuralMoves.reverse()
             neuralBoard.reverse()
             if winner == cross:
                 for i in range(len(neuralMoves)):
                     target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                     target[neuralMoves[i]] = 0.0
                     self.NN.train(learningRate * (decay**i),
                                   neuralBoard[i], target)
                 for i in range(len(playerMoves)):
                     target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                     target[playerMoves[i]] = 1.0
                     self.NN.train(learningRate * (decay**i),
                                   playerBoard[i], target)
             elif winner == circle:
                 for i in range(len(neuralMoves)):
                     target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                     target[neuralMoves[i]] = 1.0
                     self.NN.train(learningRate * (decay**i),
                                   neuralBoard[i], target)
                 for i in range(len(playerMoves)):
                     target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                     target[playerMoves[i]] = 0.0
                     self.NN.train(learningRate * (decay**i),
                                   playerBoard[i], target)
示例#2
0
 def trainFor(self, learningRate, decay, exploreRate, episodes):
     cross = 1
     circle = -1
     gameNum = 0
     for _ in range(episodes):
         self.totalGamesTrained += 1
         gameNum += 1
         print("Number " + str(gameNum))
         game = TicTacToe()
         crossMoves = []
         crossBoard = []
         circleMoves = []
         circleBoard = []
         mark = circle
         while game.finished() == False:
             #game.print()
             if mark == cross:
                 mark = circle
             else:
                 mark = cross
             x = None
             y = None
             board = game.getBoard(mark)
             if random.random() >= exploreRate:
                 coord = self.action(board)
                 x = coord[0][0]
                 y = coord[0][1]
                 while game.addMark(mark, x, y) == False:
                     #game.print()
                     out = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                     value = self.coordinatesToIndex(x, y)
                     out[value] = 0.0
                     self.NN.trainWithOutput(learningRate, out)
                     coord = self.action(board)
                     x = coord[0][0]
                     y = coord[0][1]
             else:
                 pos = [[0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1],
                        [0, 2], [1, 2], [2, 2]]
                 picked = random.choice(pos)
                 x = picked[0]
                 y = picked[1]
                 #print("Explored!")
                 while game.addMark(mark, x, y) == False:
                     pos.remove(picked)
                     picked = random.choice(pos)
                     x = picked[0]
                     y = picked[1]
             value = self.coordinatesToIndex(x, y)
             if mark == cross:
                 crossMoves.append(value)
                 crossBoard.append(board)
             else:
                 circleMoves.append(value)
                 circleBoard.append(board)
         #game.print()
         crossMoves.reverse()
         crossBoard.reverse()
         circleMoves.reverse()
         circleBoard.reverse()
         if game.getWinner() == cross:
             for i in range(len(crossMoves)):
                 target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                 target[crossMoves[i]] = 1.0
                 self.NN.train(learningRate * (decay**i), crossBoard[i],
                               target)
             for i in range(len(circleMoves)):
                 target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                 target[circleMoves[i]] = 0.0
                 self.NN.train(learningRate * (decay**i), circleBoard[i],
                               target)
         elif game.getWinner == circle:
             for i in range(len(circleMoves)):
                 target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
                 target[circleMoves[i]] = 1.0
                 self.NN.train(learningRate * (decay**i), circleBoard[i],
                               target)
             for i in range(len(crossMoves)):
                 target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
                 target[crossMoves[i]] = 0.0
                 self.NN.train(learningRate * (decay**i), crossBoard[i],
                               target)