def PlayGame(opponent): global board board = Functions.BoardInit() player = 1 UpdateBoard(player) if (opponent == "Human"): player = randint(1, 2) movesAvailable = 1 gamePlaying = 1 while gamePlaying != 0: if (Functions.GameOver(board) == 1): gamePlaying = 0 score = Functions.BlackWhiteCount(board) if (score > 0): winner = 1 elif (score < 0): winner = 2 else: winner = 0 if (winner != 0): print("Game Over! The winner is player ", winner) else: print("It's a tie!") board = Functions.BoardInit() cont.set(0) window.wait_variable(cont) player = Functions.nextPlayer(board, player) time.sleep(0.25) if (opponent == "Rand"): while Functions.GameOver(board) == 0: player = randint(1, 2) movesAvailable = 1 gamePlaying = 1 while gamePlaying != 0: UpdateBoard(player) if (Functions.GameOver(board) == 1): gamePlaying = 0 score = Functions.BlackWhiteCount(board) if (score > 0): winner = 1 elif (score < 0): winner = 2 else: winner = 0 if (winner != 0): print("Game Over! The winner is player ", winner) else: print("It's a tie!") board = Functions.BoardInit() movesAvailable = Functions.MovesAvailable(board, player) cont.set(0) if (player == 1): window.wait_variable(cont) player = Functions.nextPlayer(board, player) time.sleep(0.25) while (player == 2): movesAvailable = Functions.MovesAvailable(board, player) print("movesAvailable", movesAvailable) if (movesAvailable): numPicked = randint(0, len(movesAvailable) - 1) print("numPicked", numPicked) movePicked = movesAvailable[numPicked] print("movePicked from movesAvailable", movePicked[0], movePicked[1]) x = int(movePicked[1]) y = int(movePicked[0]) board = Functions.MakeMove(board, y, x, player) player = Functions.nextPlayer(board, player) elif (opponent == "MinMax"): while Functions.GameOver(board) == 0: player = randint(1, 2) UpdateBoard(player) movesAvailable = 1 gamePlaying = 1 while gamePlaying != 0: UpdateBoard(player) if (Functions.GameOver(board) == 1): gamePlaying = 0 score = Functions.BlackWhiteCount(board) if (score > 0): winner = 1 elif (score < 0): winner = 2 else: winner = 0 if (winner != 0): print("Game Over! The winner is player ", winner) else: print("It's a tie!") board = Functions.BoardInit() movesAvailable = Functions.MovesAvailable(board, player) cont.set(0) if (player == 1): window.wait_variable(cont) player = Functions.nextPlayer(board, player) time.sleep(0.25) while (player == 2): nextX = 0 nextY = 0 movesAvailable = Functions.MovesAvailable( board, player) # all available moves bestPred = 0 j = 0 while (j <= len(movesAvailable) - 1 and movesAvailable): # for each ove in movesAvailable moveTest = movesAvailable[j] x = int(moveTest[1]) y = int(moveTest[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove(testBoard, y, x, player) pred = np.sum(Functions.boardToNN( testBoard, player)) # get the output of an available move #finds the move with the highest output, output of 1 means player 1 is guaranteed to win if (j == 0): bestPred = pred nextX = x nextY = y else: if (pred < bestPred): bestPred = pred nextX = x nextY = y j += 1 board = Functions.MakeMove(board, nextY, nextX, player) player = Functions.nextPlayer(board, player) elif (opponent == "Network"): ops.reset_default_graph() numInp = 65 # the number of inputs for the neural network numLabel = 1 # the number of inputs for the labels # Create Placeholders for input and label inp, label = ml.placeholders(numInp, numLabel) # Initialise parameters parameters = ml.initialiseParameters() #make the nerual network out = ml.network(inp, parameters) modelPath = "./save/NetworkPlayer" # Initialise all the variables init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) #loads the network in or initialises a new network try: saver.restore(sess, modelPath) #print("Model restored from file: %s" % modelPath) except: #sessctd = False print("Initialiing") while Functions.GameOver(board) == 0: board = Functions.BoardInit() player = randint(1, 2) UpdateBoard(player) movesAvailable = 1 gamePlaying = 1 while gamePlaying != 0: UpdateBoard(player) if (Functions.GameOver(board) == 1): gamePlaying = 0 score = Functions.BlackWhiteCount(board) if (score > 0): winner = 1 elif (score < 0): winner = 2 else: winner = 0 if (winner != 0): print("Game Over! The winner is player ", winner) else: print("It's a tie!") board = Functions.BoardInit() movesAvailable = Functions.MovesAvailable(board, player) cont.set(0) if (player == 1): window.wait_variable(cont) player = Functions.nextPlayer(board, player) time.sleep(0.25) while (player == 2): nextX = 0 nextY = 0 movesAvailable = Functions.MovesAvailable( board, player) # all available moves bestPred = 0 j = 0 while (j <= len(movesAvailable) - 1 and movesAvailable ): # for each ove in movesAvailable moveTest = movesAvailable[j] x = int(moveTest[1]) y = int(moveTest[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, y, x, player) pred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of an available move #finds the move with the highest output, output of 1 means player 1 is guaranteed to win if (j == 0): bestPred = pred nextX = x nextY = y else: if (pred < bestPred): bestPred = pred nextX = x nextY = y j += 1 board = Functions.MakeMove(board, nextY, nextX, player) player = Functions.nextPlayer(board, player)
def training(opponent, batches): import Functions import MachineLearning as ml import tensorflow as tf import numpy as np import time from random import random, randint # from tensorflow import ops tf.reset_default_graph() lr = 0.0001 # the learning rate numInp = 65 # the number of inputs for the neural network numLabel = 1 # the number of inputs for the labels batchSize = 50 # the size of the batches discountRate = 0.99 # the discount rate for temporal difference learning # Create Placeholders for input and label inp, label = ml.placeholders(numInp, numLabel) # Initialise parameters parameters = ml.initialiseParameters() #make the nerual network out = ml.network(inp, parameters) #cost function for use in training cost = ml.computeCost(out, label) #training function optimiser = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost) player1 = "Network" player2 = opponent # Initialise all the variables init = tf.global_variables_initializer() progstart = time.time() saver = tf.train.Saver() expRate = 0.2 p1wins = 0 # number of games player 1 has won overall p2wins = 0 # number of games player 2 has won overall modelPath = "./save/NetworkPlayer" #WLTD1step #testNetScore or testWinLoss i = 1 while i <= (batches): s = 1 winLoss = np.ndarray((1, 1)) board = Functions.BoardInit() boardArray = Functions.boardToNN(board, 1) batchp1wins = 0 batchp2wins = 0 labelArray = np.ndarray((1, 1)) labelArray[0][0] = 0 with tf.Session() as sess: sess.run(init) #loads the network in or initialises a new network try: saver.restore(sess, modelPath) except: print("Initialising") start = time.time() while s <= batchSize: movesAvailable = 1 gamePlaying = 1 player = randint( 1, 2 ) # returns either a 1 or a 2 which determines the starting player board = Functions.BoardInit() nnBoard = Functions.boardToNN(board, player) boardArray = np.concatenate((boardArray, nnBoard), axis=1) gameLabelArray = np.ndarray((1, 1)) while gamePlaying != 0: while ((player1 == "Network" and player == 1) and Functions.GameOver(board) == 0): # if player 1 is a network nextX = 0 nextY = 0 movesAvailable = Functions.MovesAvailable( board, player) # all available moves bestPred = 0 j = 0 if ( random() > expRate ): # most of the time the network will play the move with the highest output while (j <= len(movesAvailable) - 1 and movesAvailable ): # for each ove in movesAvailable moveTest = movesAvailable[j] x = int(moveTest[1]) y = int(moveTest[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, y, x, player) pred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of an available move #finds the move with the highest output, output of 1 means player 1 is guaranteed to win if (j == 0): bestPred = pred nextX = x nextY = y else: if (pred > bestPred): bestPred = pred nextX = x nextY = y j += 1 elif ( movesAvailable ): # sometimes the network will play a move completely randomly to better explore all possible moves move = movesAvailable[randint( 0, len(movesAvailable) - 1)] nextX = int(move[1]) nextY = int(move[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, nextY, nextX, player) bestPred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of the move winLoss[0][ 0] = bestPred * discountRate # label = output * discount rate gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) # add the label to the list of labels board = Functions.MakeMove(board, nextY, nextX, player) # update the board nnBoard = Functions.boardToNN(board, player) if (Functions.GameOver(board) == 1 ): # if the game is over result = Functions.BlackWhiteCount( board) #sum(nnBoard)#find the winner winLoss[0][0] = result # if(result>0): # winLoss[0][0] = 1 # elif(result==0): # winLoss[0][0] = 0 # elif(result<0): # winLoss[0][0] = -1 # final label is equal to the winner gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) # add the label to the list of labels boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards #print(winLoss[0][0]) else: player = Functions.nextPlayer( board, player) # find who plays next boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards while (((player2 == "Rand" and player == 2)) and Functions.GameOver(board) == 0): # if player 2 is random movesAvailable = Functions.MovesAvailable( board, player) # all available moves if (movesAvailable): # pick and play a move at random numPicked = randint(0, len(movesAvailable) - 1) movePicked = movesAvailable[numPicked] x = int(movePicked[1]) y = int(movePicked[0]) board = Functions.MakeMove(board, y, x, player) # add the resulting position to the board array and the label array nnBoard = Functions.boardToNN(board, player) bestPred = out.eval(feed_dict={inp: nnBoard}) winLoss[0][0] = bestPred * discountRate gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) if (Functions.GameOver(board) == 1 ): # if the game is over result = Functions.BlackWhiteCount( board) #sum(nnBoard)#find the winner winLoss[0][0] = result # if(result>0): # winLoss[0][0] = 1 # elif(result==0): # winLoss[0][0] = 0 # elif(result<0): # winLoss[0][0] = -1 # final label is equal to the winner gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1 ) # add the label to the list of labels boardArray = np.concatenate( (boardArray, nnBoard), axis=1 ) # add the board to the list of boards if (Functions.GameOver(board) == 0): player = Functions.nextPlayer( board, player) # find who plays next boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards while ((player2 == "Network" and player == 2) and Functions.GameOver(board) == 0): nextX = 0 nextY = 0 movesAvailable = Functions.MovesAvailable( board, player) # all available moves bestPred = 0 j = 0 if ( random() > expRate ): # most of the time the network will play the move with the highest output while (j <= len(movesAvailable) - 1 and movesAvailable ): # for each ove in movesAvailable moveTest = movesAvailable[j] x = int(moveTest[1]) y = int(moveTest[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, y, x, player) pred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of an available move #finds the move with the lowest output, output of -1 means player 2 is guaranteed to win if (j == 0): bestPred = pred nextX = x nextY = y else: if (pred < bestPred): bestPred = pred nextX = x nextY = y j += 1 elif ( movesAvailable ): # sometimes the network will play a move completely randomly to better explore all possible moves move = movesAvailable[randint( 0, len(movesAvailable) - 1)] nextX = int(move[1]) nextY = int(move[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, nextY, nextX, player) bestPred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of the move winLoss[0][ 0] = bestPred * discountRate # label = output * discount rate gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) # add the label to the list of labels board = Functions.MakeMove(board, nextY, nextX, player) # update the board nnBoard = Functions.boardToNN(board, player) if (Functions.GameOver(board) == 1 ): # if the game is over result = Functions.BlackWhiteCount( board) #sum(nnBoard)#find the winner winLoss[0][0] = result # if(result>0): # winLoss[0][0] = 1 # elif(result==0): # winLoss[0][0] = 0 # elif(result<0): # winLoss[0][0] = -1 # final label is equal to the winner gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) # add the label to the list of labels boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards #print(winLoss[0][0]) else: player = Functions.nextPlayer( board, player) # find who plays next boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards if (Functions.GameOver(board) == 1): #if the game ends concArray = np.copy( gameLabelArray[0] [1:]) # copy all the data from the array of labels size = len(concArray) concArray = np.reshape(concArray, (1, size)) """ reshape the array and remove the first value, this means the label for the first board state is equal to the output of the next move * the discount rate """ labelArray = np.concatenate((labelArray, concArray), axis=1) gamePlaying = 0 Winner = Functions.BlackWhiteCount( board) #sum(nnBoard) if (Winner > 0): batchp1wins += 1 p1wins += 1 elif (Winner < 0): batchp2wins += 1 p2wins += 1 s += 1 print("batch: ", i, " out of ", batches) print("player 1 (", player1, ") wins ", batchp1wins) print("player 2 (", player2, ") wins ", batchp2wins) """ the following code flips the training data since a position where player 2 is guaranteed to win would be a position where player 1 is guaranteed to win if every piece was reversed """ labelArrayOpp = Functions.ReverseArray(labelArray) boardArrayOpp = Functions.ReverseArray(boardArray) labelArray = np.concatenate((labelArray, labelArrayOpp), axis=1) boardArray = np.concatenate((boardArray, boardArrayOpp), axis=1) #trains the neural network _, boardCost = sess.run([optimiser, cost], feed_dict={ inp: boardArray, label: labelArray }) print(boardCost) #savePath = saver.save(sess, modelPath)# saves the updated network end = time.time() print("batch time(secs) ", end - start) i += 1 progend = time.time() print("total games: ", batches * batchSize) print("player 1 (", player1, ") wins ", p1wins) print("player 2 (", player2, ") wins ", p2wins) print("total time(secs) ", progend - progstart)