def reward(state): b = TicTacToe(board=tuple_to_list2d(state)) tmp = b.finished() if tmp == False: return RBASE if tmp == True: return RDRAW if tmp == Player.X: return RVICTORY return RLOSS
else: print(f("{v} isn't a valid player, please use 'X' or 'O'")) print("Using default...") player = Player.X print("Player starting" if player == Player.X else "Player second") # Use precalculated policy from file. # If file not present, find the policy via value iteration. try: print("Reading the AI policy") if player == player.X: file = open("policy_O.pkl", "rb") else: file = open("policy_X.pkl", "rb") policy = pickle.load(file) ai = Agent(game, policy) except: print("Failed to read the AI policy, calculating it now...") print("To precalculate and save the policy run valueiteration.py") ai = ReinforcementAgent(game, ~player) print("Ready to play!!!") assert (len(sys.argv) == 2) # TODO : Add possibility for player to play as O while True: if not game.finished() and game.player is ~player: ai.play() handle_events(game) draw_board(game.board)
def startGame(self, trainingGame=False, learningRate=0.1, decay=0.99): cross = 1 circle = -1 playerMoves = [] playerBoard = [] neuralMoves = [] neuralBoard = [] mark = circle game = TicTacToe() entry = "" while entry != "exit" and game.finished() == False: if entry == "print": self.print() else: game.print() if mark == cross: mark = circle board = game.getBoard(mark) coord = self.action(board) x = coord[0][0] y = coord[0][1] #print(coord) print("x: " + str(x) + ", y: " + str(y)) while game.addMark(mark, x, y) == False: if trainingGame == False: break out = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] value = self.coordinatesToIndex(x, y) out[value] = 0.0 self.NN.trainWithOutput(learningRate, out) coord = self.action(board) x = coord[0][0] y = coord[0][1] if trainingGame: value = self.coordinatesToIndex(x, y) neuralMoves.append(value) neuralBoard.append(board) else: mark = cross entry = input() if entry != "exit": split = entry.split(',') x = int(split[0]) y = int(split[1]) if game.addMark(mark, x, y) and trainingGame: board = game.getBoard(mark) value = self.coordinatesToIndex(x, y) playerMoves.append(value) playerBoard.append(board) print() if game.finished() == True: game.print() winner = game.getWinner() print("Winner: " + str(winner)) if trainingGame: neuralMoves.reverse() neuralBoard.reverse() if winner == cross: for i in range(len(neuralMoves)): target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] target[neuralMoves[i]] = 0.0 self.NN.train(learningRate * (decay**i), neuralBoard[i], target) for i in range(len(playerMoves)): target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] target[playerMoves[i]] = 1.0 self.NN.train(learningRate * (decay**i), playerBoard[i], target) elif winner == circle: for i in range(len(neuralMoves)): target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] target[neuralMoves[i]] = 1.0 self.NN.train(learningRate * (decay**i), neuralBoard[i], target) for i in range(len(playerMoves)): target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] target[playerMoves[i]] = 0.0 self.NN.train(learningRate * (decay**i), playerBoard[i], target)
def trainFor(self, learningRate, decay, exploreRate, episodes): cross = 1 circle = -1 gameNum = 0 for _ in range(episodes): self.totalGamesTrained += 1 gameNum += 1 print("Number " + str(gameNum)) game = TicTacToe() crossMoves = [] crossBoard = [] circleMoves = [] circleBoard = [] mark = circle while game.finished() == False: #game.print() if mark == cross: mark = circle else: mark = cross x = None y = None board = game.getBoard(mark) if random.random() >= exploreRate: coord = self.action(board) x = coord[0][0] y = coord[0][1] while game.addMark(mark, x, y) == False: #game.print() out = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] value = self.coordinatesToIndex(x, y) out[value] = 0.0 self.NN.trainWithOutput(learningRate, out) coord = self.action(board) x = coord[0][0] y = coord[0][1] else: pos = [[0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1], [0, 2], [1, 2], [2, 2]] picked = random.choice(pos) x = picked[0] y = picked[1] #print("Explored!") while game.addMark(mark, x, y) == False: pos.remove(picked) picked = random.choice(pos) x = picked[0] y = picked[1] value = self.coordinatesToIndex(x, y) if mark == cross: crossMoves.append(value) crossBoard.append(board) else: circleMoves.append(value) circleBoard.append(board) #game.print() crossMoves.reverse() crossBoard.reverse() circleMoves.reverse() circleBoard.reverse() if game.getWinner() == cross: for i in range(len(crossMoves)): target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] target[crossMoves[i]] = 1.0 self.NN.train(learningRate * (decay**i), crossBoard[i], target) for i in range(len(circleMoves)): target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] target[circleMoves[i]] = 0.0 self.NN.train(learningRate * (decay**i), circleBoard[i], target) elif game.getWinner == circle: for i in range(len(circleMoves)): target = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] target[circleMoves[i]] = 1.0 self.NN.train(learningRate * (decay**i), circleBoard[i], target) for i in range(len(crossMoves)): target = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] target[crossMoves[i]] = 0.0 self.NN.train(learningRate * (decay**i), crossBoard[i], target)