def train(qTables, numGames, alpha, tryHard): tryHardGrowth = (1 - tryHard) / numGames for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) tryHard = tryHard + tryHardGrowth
def playGame(): board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = 2 #random.randint(1,2) turn = 0 print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: board = pickBestNextBoard(board, player, computersPlayer) player = tt.togglePlayer(player) elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) turn += 1 winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def test(mct, numGames, numSims): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: simulateChildren(mct, board, player, computersPlayer, numSims) bestBoard = pickBestNextMove(mct, board, player) # print("################") # tt.printBoard(board) # tt.printBoard(bestBoard) # print("BESTMOVE") board = bestBoard else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def test(qTables, numGames, tryHard=1.0): numWins = 0 numTies = 0 numLosses = 0 for i in tqdm(range(numGames)): board = tt.genBoard() movesLeft = True winner = False player = 2 keysSoFar = [] movesSoFar = [] computersPlayer = random.randint(1, 2) while (movesLeft and not winner): if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) else: moves = tt.listEmpties(board) randomMove = random.choice(moves) tt.applyMove(player, randomMove, board) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) if winner == computersPlayer: numWins += 1 elif winner == tt.togglePlayer(computersPlayer): numLosses += 1 else: # tie numTies += 1 return numWins, numLosses, numTies
def playGame(): saveQTables = False fileName = 'qTables.pickle' qTables = {} keysSoFar = [] movesSoFar = [] tryHard = 0 alpha = 0.9 numTrials = 1000000 if saveQTables: train(qTables, numTrials, alpha, tryHard) f = open(fileName, 'wb') pickle.dump(qTables, f, pickle.HIGHEST_PROTOCOL) f.close() else: f = open(fileName, 'rb') qTables = pickle.load(f) f.close() numWins, numLosses, numTies = test(qTables, 1000) print("VS RANDOM OPPONENT...") print("numWins:" + str(numWins)) print("numLosses:" + str(numLosses)) print("numTies:" + str(numTies)) quit() board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: bestMove = pickBestNextMove(qTables, keysSoFar, board, player, computersPlayer, tryHard=1.0, verbose=True) movesSoFar.append(bestMove) tt.applyMove(player, bestMove, board) player = tt.togglePlayer(player) elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) score = scoreEndBoard(board, winner, computersPlayer) updateQTable(score, qTables, keysSoFar, movesSoFar, alpha) for key in keysSoFar: pprint(key) pprint(qTables[key]) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
def playGame(): mct = {} board = tt.genBoard() player = 2 computersPlayer = 2 numSimsPreGame = 100000 numSimsOnline = 100 saveMCTree = False fileName = 'mct.pickle' if saveMCTree: simulateChildren(mct, board, player, computersPlayer, numSimsPreGame, verbose=True) f = open(fileName, 'wb') pickle.dump(mct, f, pickle.HIGHEST_PROTOCOL) f.close() quit() else: f = open(fileName, 'rb') mct = pickle.load(f) f.close() # mct = {} # numTrials = 100 # numWins, numLosses, numTies = test(mct, numTrials, numSimsOnline) # print("VS RANDOM OPPONENT...") # print("numWins:" + str(numWins)) # print("numLosses:" + str(numLosses)) # print("numTies:" + str(numTies)) # quit() # w 0.6, t 0.11, l 0.3 board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1, 2) print("NEW GAME") if computersPlayer == 2: print("COMPUTER GOES FIRST...") while (movesLeft and not winner): if player == 2: print("X's Turn") else: # player == 1 print("O's Turn") tt.printBoard(board) if player == computersPlayer: simulateChildren(mct, board, player, computersPlayer, numSimsOnline, verbose=True) bestBoard = pickBestNextMove(mct, board, player) board = bestBoard elif player == tt.togglePlayer(computersPlayer): validMove = False while validMove == False: move = input("input move of form 'y x' ") y = int(move[0]) x = int(move[2]) # validate move if board[y][x] is not 0: print("!!!INVALID MOVE!!!") continue else: validMove = True board[y][x] = tt.togglePlayer(computersPlayer) player = tt.togglePlayer(player) winner = tt.getWinner(board) movesLeft = not tt.noMoreMoves(board) tt.printBoard(board) score = scoreEndBoard(board, winner, computersPlayer) if winner: if winner == 2: print("WINNER: X") else: # winner == 1 print("WINNER: O") else: print("TIE")
newData.append(data[int(sampleIndex)]) sampleIndex += intervalSize assert (len(newData) == maxWindowSize) return newData verbose = True logInterval = 100 plotInterval = 20 if train: for i in trange(numTrains): # if i > 1000: # makeRandomMoves = False board = tt.genBoard() movesLeft = True winner = False player = 2 computersPlayer = random.randint(1,2) playerOneTrainingSessions = [] playerTwoTrainingSessions = [] if verbose and i % logInterval == 0: print("NEW GAME") while(movesLeft and not winner): if verbose and i % logInterval == 0: if player == 2: print("X's Turn") else: # player == 1