def predictions(outputs): listOfMoves = [] newBoard = ChessEnvironment() for i in range(len(outputs)): if newBoard.result != 2: listOfMoves.append('0000') else: legalMoves = ActionToArray.legalMovesForState(newBoard.arrayBoard, newBoard.board) evaluationScores = ActionToArray.moveEvaluations( ActionToArray.legalMovesForState(newBoard.arrayBoard, newBoard.board), newBoard.arrayBoard, outputs[i]) move = legalMoves[np.argmax(evaluationScores)] newBoard.makeMove(move) newBoard.gameResult() listOfMoves.append(move) return listOfMoves
def __getitem__(self, index): #BinaryConverted Method!! inArray = ActionToArray.binaryArrayToBoard(self.features[index]) # policy output vector created array = np.zeros(2308) array[int(self.numpy[index])] = self.targetMag[index] output = torch.from_numpy(array) return inArray, output, np.expand_dims(self.targets2[index], axis=0)
def addPositionToMCTS(self, string, legalMoves, arrayBoard, prediction, actualBoard): self.dictionary[string] = len(self.dictionary) self.childrenMoveNames.append(legalMoves) self.childrenStateSeen.append(np.zeros(len(legalMoves))) self.childrenStateWin.append(np.zeros(len(legalMoves))) policy = ActionToArray.moveEvaluations(legalMoves, arrayBoard, prediction) self.childrenPolicyEval.append(policy) #value = ValueEvaluation.moveValueEvaluations(legalMoves, actualBoard, self.neuralNet) noValue = np.zeros(len(legalMoves)) self.childrenValueEval.append(noValue)
def addPositionToMCTS(self, string, legalMoves, arrayBoard, prediction): start = time.time() self.dictionary[string] = len(self.dictionary) self.childrenMoveNames.append(legalMoves) self.childrenStateSeen.append(torch.zeros(len(legalMoves))) self.childrenStateWin.append(torch.zeros(len(legalMoves))) policy = ActionToArray.moveEvaluations(legalMoves, arrayBoard, prediction) self.childrenPolicyEval.append(policy) #value = ValueEvaluation.moveValueEvaluations(legalMoves, actualBoard, self.neuralNet) noValue = torch.zeros(len(legalMoves)) self.childrenValueEval.append(noValue) end = time.time() print("ADD TIME:", end - start)
def addPositionToMCTS(self, string, legalMoves, arrayBoard, prediction): self.dictionary[string] = len(self.dictionary) self.childrenMoveNames.append(legalMoves) self.childrenStateSeen.append(np.zeros(len(legalMoves))) self.childrenStateWin.append(np.zeros(len(legalMoves))) # should scale the evaluations from 0 to 1. evaluations = ActionToArray.moveEvaluations(legalMoves, arrayBoard, prediction) if len(evaluations) > 0: minVal = np.amin(evaluations) # pretend this is -2 maxVal = np.amax(evaluations) # pretend this is 10 if minVal != maxVal: multiplier = maxVal - minVal # then multiplier is 12. max = 10/12, min = -2/12 try: center = minVal / multiplier except ZeroDivisionError: center = 0 evaluations = (evaluations / multiplier) evaluations = evaluations - center else: evaluations = evaluations / maxVal self.childrenNNEvaluation.append(evaluations)
singleGame.append(move.uci()) listOfMoves.append(singleGame) listOfResults.append(result) print(pgnGames[g]) except: print("", end="") inList = [] outList = [] actionList = [] actionMagList = [] for j in range(len(listOfMoves)): board = ChessEnvironment() for i in range(len(listOfMoves[j])): state = ActionToArray.boardToBinaryArray(board.boardToState()) value = listOfResults[j] action = ActionToArray.moveArray(listOfMoves[j][i], board.arrayBoard) if i % 2 == 0: if value == 1: mag = 1 elif value == 0: mag = 0.5 else: mag = 0.2 else: if value == -1: mag = 1 elif value == 0: mag = 0.5 else:
print("CHOSEN DEPTH:",model.DEPTH_VALUE) model.competitivePlayoutsFromPosition(ENGINE_PLAYOUTS, board) end = time.time() TIME_SPENT = end-start directory = model.dictionary[board.boardToString()] if board.plies > 10 or board.plies < 2: index = np.argmax(model.childrenStateSeen[directory]) else: index = np.argmax(MCTSCrazyhouse.noiseEvals(model.childrenPolicyEval[directory], noiseVal)) move = model.childrenMoveNames[directory][index] else: state = torch.from_numpy(board.boardToState()) # moves in a position moveNames = ActionToArray.legalMovesForState(board.arrayBoard, board.board) mate = isThereMate(board, moveNames, model.matefinder) if mate != None: index = mate print("I see mate!") else: model.neuralNet.eval() outputs = model.neuralNet(state)[0] policyScore = ActionToArray.moveEvaluations(moveNames, board.arrayBoard, outputs) noise = (np.random.rand(len(policyScore)) * 2 * noiseVal) - (noiseVal) index = np.argmax(policyScore+noise) move = moveNames[index]
def playout( self, round, explorationConstant=2**0.5, # lower? will test more. notFromBeginning=False, arrayBoard=0, pythonBoard=0, plies=0, wCap=0, bCap=0, actuallyAPawn=0, noise=True, printPGN=True ): # Here is the information just for starting at a different position whiteParentStateDictionary = [] whiteStateSeen = [] whiteStateWin = [] blackParentStateDictionary = [] blackStateSeen = [] blackStateWin = [] tempBoard = ChessEnvironment() if notFromBeginning: tempBoard.arrayBoard = arrayBoard tempBoard.board = pythonBoard tempBoard.plies = plies tempBoard.whiteCaptivePieces = wCap tempBoard.blackCaptivePieces = bCap tempBoard.actuallyAPawn = actuallyAPawn tempBoard.updateNumpyBoards() depth = 0 while tempBoard.result == 2 and depth < self.DEPTH_VALUE: depth += 1 position = tempBoard.boardToString() if position not in self.dictionary: # Create a new entry in the tree, if the state is not seen before. state = torch.from_numpy(tempBoard.boardToState()) action = torch.zeros(1) data = DoubleHeadDataset(state, action, action) testLoader = torch.utils.data.DataLoader(dataset=data, batch_size=1, shuffle=False) device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') start = time.time() for images, irrelevant1, irrelevant2 in testLoader: images = images.to(device) outputs = self.neuralNet(images)[0] end = time.time() print("BLAH:", end - start) self.addPositionToMCTS( tempBoard.boardToString(), ActionToArray.legalMovesForState(tempBoard.arrayBoard, tempBoard.board), tempBoard.arrayBoard, outputs) # find and make the preferred move if noise: noiseConstant = 0.15 / (1 * (1 + tempBoard.plies) ) # should decrease this... else: noiseConstant = 0 if len(self.childrenStateWin) > 0: _, index = (PUCT_Algorithm( self.childrenStateWin[len(self.childrenStateSeen) - 1], self.childrenStateSeen[len(self.childrenStateSeen) - 1], explorationConstant, torch.sum( self.childrenStateSeen[len(self.childrenStateSeen) - 1]), self.childrenValueEval[len(self.childrenStateSeen) - 1], noiseEvals( self.childrenPolicyEval[len(self.childrenStateSeen) - 1], noiseConstant))).max(0) else: index = 0 move = self.childrenMoveNames[len(self.childrenStateSeen) - 1][index] # print(move) tempBoard.makeMove(move) actionVector = torch.zeros( len(self.childrenMoveNames[len(self.childrenStateSeen) - 1])) actionVector[index] = 1 else: # find the directory of the move directory = self.dictionary[position] if noise: noiseConstant = 0.6 / (2.5 * (1 + tempBoard.plies)) else: noiseConstant = 0 _, index = (PUCT_Algorithm( self.childrenStateWin[directory], self.childrenStateSeen[directory], explorationConstant, torch.sum(self.childrenStateSeen[directory]), self.childrenValueEval[directory], noiseEvals(self.childrenPolicyEval[directory], noiseConstant))).max(0) move = self.childrenMoveNames[directory][index] # print(move) tempBoard.makeMove(move) # the move will have to be indexed correctly based on where the position is. actionVector = torch.zeros( len(self.childrenMoveNames[directory])) actionVector[index] = 1 # add this into the actions chosen. if tempBoard.plies % 2 == 1: # white has moved. whiteParentStateDictionary.append(position) whiteStateSeen.append(actionVector) else: # black has moved blackParentStateDictionary.append(position) blackStateSeen.append(actionVector) # print(tempBoard.board) tempBoard.gameResult() if tempBoard.result == 1: # white victory for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i]) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0) if tempBoard.result == -1: # black victory for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j]) # this is okay, because if the game is played til checkmate then # this ensures that the move count for both sides is equal. if tempBoard.result == 0: # 'tis a tie for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0.5) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0.5) if tempBoard.result == 2: # game isn't played to very end winRate = ValueEvaluation.positionEval(tempBoard, self.neuralNet) # tempBoard.printBoard() # print(ActionToArray.legalMovesForState(tempBoard.arrayBoard, tempBoard.board)) # if depth is not divisible by two then win rate is of opponent if depth % 2 == 0: if tempBoard.plies % 2 == 0: # this means that we are evaluating white for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * winRate) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * (1 - winRate)) else: # this means that we are evaluating black for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * (1 - winRate)) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * winRate) else: winRate = 1 - winRate if tempBoard.plies % 2 == 1: # this means that we are evaluating white for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * winRate) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * (1 - winRate)) else: # this means that we are evaluating black for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * (1 - winRate)) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * winRate) # now, add the information into the MCTS database. for i in range(len(whiteStateSeen)): directory = self.dictionary[whiteParentStateDictionary[i]] self.childrenStateSeen[directory] = self.childrenStateSeen[ directory] + whiteStateSeen[i] self.childrenStateWin[directory] = self.childrenStateWin[ directory] + whiteStateWin[i] for i in range(len(blackStateSeen)): directory = self.dictionary[blackParentStateDictionary[i]] self.childrenStateSeen[directory] = self.childrenStateSeen[ directory] + blackStateSeen[i] self.childrenStateWin[directory] = self.childrenStateWin[ directory] + blackStateWin[i]
def trainNetwork(states, outputMoves, EPOCHS=10000, BATCH_SIZE=1000, LR=0.001, loadDirectory = 'none.pt', saveDirectory='network1.pt', OUTPUT_ARRAY_LEN=4504, THRESHOLD_FOR_SAVE=100): states = torch.from_numpy(states) outputMoves = torch.from_numpy(outputMoves) boards, actions = states, outputMoves data = MyDataset(boards, actions) trainLoader = torch.utils.data.DataLoader(dataset=data, batch_size=BATCH_SIZE, shuffle=True) testLoader = torch.utils.data.DataLoader(dataset=data, batch_size=len(boards), shuffle=False) # to create a prediction, create a new dataset with input of the states, and output should just be np.zeros() # TRAINING! model = ChessConvNet(OUTPUT_ARRAY_LEN).double() try: model = torch.load(loadDirectory) except: print("Pretrained NN model not found!") criterion = nn.PoissonNLLLoss() optimizer = torch.optim.Adam(model.parameters(), lr=LR) total_step = len(trainLoader) trainNotFinished = True for epoch in range(EPOCHS): if trainNotFinished: for i, (images, labels) in enumerate(trainLoader): images = images.to('cpu') labels = labels.to('cpu') # Forward pass outputMoves = model(images) loss = criterion(outputMoves, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 1 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch + 1, EPOCHS, i + 1, total_step, loss.item())) # Test the model model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance) answers = np.argmax(actions.numpy(), axis=1) with torch.no_grad(): for images, labels in testLoader: images = images.to('cpu') labels = labels.to('cpu') outputMoves = model(images) _, predicted = torch.max(outputMoves.data, 1) # print expectations vs reality print("MAX", np.amax(outputMoves.numpy())) print("MIN", np.amin(outputMoves.numpy())) print(predicted.numpy()) print(answers) correct = (predicted.numpy() == answers).sum() acc = 100 * (correct / len(answers)) print("argmax prediction: ", acc, "% correct.") if epoch % 2000 == 100: newBoard = ChessEnvironment() for i in range(len(outputMoves.numpy())): if newBoard.result == 2: move = ActionToArray.moveArrayToString(outputMoves.numpy()[i].reshape((1, 4504)), newBoard.arrayBoard, newBoard.board, newBoard.whiteCaptivePieces, newBoard.blackCaptivePieces, newBoard.plies) print("NN PREDICTED MOVE: ", move) # See if the evaluation score matches up with the argmax function! legalMoves = ActionToArray.legalMovesForState(newBoard.arrayBoard, newBoard.board) evaluationScores = ActionToArray.moveEvaluations( ActionToArray.legalMovesForState(newBoard.arrayBoard, newBoard.board), newBoard.arrayBoard, outputMoves[i]) #print("Evaluation Rankings: ") print(" = " + legalMoves[np.argmax(evaluationScores)]) #print(ActionToArray.sortEvals(legalMoves, evaluationScores)) newBoard.makeMove(move) newBoard.gameResult() else: print(newBoard.gameStatus) print(newBoard.board) newBoard.gameResult() print(newBoard.boardToString()) print(newBoard.gameStatus) if acc >= THRESHOLD_FOR_SAVE: torch.save(model, saveDirectory) print("Updated!") trainNotFinished = False # make sure it saves the model regardless. torch.save(model, saveDirectory) print("Updated!")
def makeMove(self, move): if chess.Move.from_uci(move) not in self.board.legal_moves: print(move) print("Illegal Move!") print(self.board) print(self.arrayBoard) # make some random move legalMoves = ActionToArray.legalMovesForState( self.arrayBoard, self.board) illegalMove = True while illegalMove: for i in range(len(legalMoves)): if chess.Move.from_uci( legalMoves[i]) in self.board.legal_moves: move = legalMoves[i] illegalMove = False if chess.Move.from_uci(move) in self.board.legal_moves: self.board.push(chess.Move.from_uci(move)) # update numpy board too - split the move and find coordinates! see old chess java work. rowNames = "abcdefgh" if move[1] != "@": initialRow = 8 - int(move[1]) # for e2d4, move[1] returns 2 else: initialRow = 0 initialCol = int(rowNames.find( move[0])) # for e2d4, move[1] returns e finalRow = 8 - int(move[3]) # for e2d4, move[3] returns 4 finalCol = int(rowNames.find( move[2])) # for e2d4, move[2] returns d # SPECIAL MOVE 1: CASTLING. MAKE SURE THAT THE PIECE IN QUESTION IS A KING!!! if move == "e1g1" and self.arrayBoard[7][ 4] == "K" and self.arrayBoard[7][7] == "R": self.arrayBoard[7][4] = " " self.arrayBoard[7][7] = " " self.arrayBoard[7][5] = "R" self.arrayBoard[7][6] = "K" elif move == "e8g8" and self.arrayBoard[0][ 4] == "k" and self.arrayBoard[0][7] == "r": self.arrayBoard[0][4] = " " self.arrayBoard[0][7] = " " self.arrayBoard[0][5] = "R" self.arrayBoard[0][6] = "K" elif move == "e8c8" and self.arrayBoard[0][ 4] == "k" and self.arrayBoard[0][0] == "r": self.arrayBoard[0][0] = " " self.arrayBoard[0][1] = " " self.arrayBoard[0][4] = " " self.arrayBoard[0][2] = "K" self.arrayBoard[0][3] = "R" elif move == "e1c1" and self.arrayBoard[7][ 4] == "K" and self.arrayBoard[7][0] == "R": self.arrayBoard[7][0] = " " self.arrayBoard[7][1] = " " self.arrayBoard[7][4] = " " self.arrayBoard[7][2] = "K" self.arrayBoard[7][3] = "R" # SPECIAL MOVE 2: EN PASSANT # check if the capture square is empty and there is a pawn on the same row but different column # white en passant elif self.arrayBoard[initialRow][initialCol] == "P" and self.arrayBoard[initialRow][finalCol] == "p" and \ self.arrayBoard[finalRow][finalCol] == " ": # print("WHITE EN PASSANT") self.arrayBoard[initialRow][initialCol] = " " self.arrayBoard[finalRow][finalCol] = "P" self.arrayBoard[initialRow][finalCol] = " " self.whiteCaptivePieces[0] += 1 # black en passant elif self.arrayBoard[initialRow][initialCol] == "p" and self.arrayBoard[initialRow][finalCol] == "P" and \ self.arrayBoard[finalRow][finalCol] == " ": # print("BLACK EN PASSANT") self.arrayBoard[initialRow][initialCol] = " " self.arrayBoard[finalRow][finalCol] = "p" self.arrayBoard[initialRow][finalCol] = " " self.blackCaptivePieces[0] += 1 elif "PRNBQ".find(move[0]) == -1: # update the board temp = self.arrayBoard[finalRow][finalCol] self.arrayBoard[finalRow][finalCol] = self.arrayBoard[ initialRow][initialCol] self.arrayBoard[initialRow][initialCol] = " " # move around the actuallyAPawn stuff too. wasAPawn = self.actuallyAPawn[finalRow][finalCol] self.actuallyAPawn[finalRow][finalCol] = self.actuallyAPawn[ initialRow][initialCol] self.actuallyAPawn[initialRow][initialCol] = 0 # this is for promotion if len(move) == 5: if self.plies % 2 == 0: self.arrayBoard[finalRow][finalCol] = move[4].upper() if self.plies % 2 == 1: self.arrayBoard[finalRow][finalCol] = move[4].lower() self.actuallyAPawn[finalRow][finalCol] = 1 # add pieces to captured area if wasAPawn == 0: # 0 means it is normal. whiteCaptured = "pnbrq".find(temp) blackCaptured = "PNBRQ".find(temp) if whiteCaptured > -1: self.whiteCaptivePieces[whiteCaptured] += 1 if blackCaptured > -1: self.blackCaptivePieces[blackCaptured] += 1 if wasAPawn == 1: # 1 means that the piece in question was once a pawn. if self.plies % 2 == 0: self.whiteCaptivePieces[0] += 1 if self.plies % 2 == 1: self.blackCaptivePieces[0] += 1 else: # this is when a captured piece is put back on the board # update the captive pieces placed = "PNBRQ".find(move[0]) if self.plies % 2 == 0: self.whiteCaptivePieces[placed] -= 1 if self.plies % 2 == 1: self.blackCaptivePieces[placed] -= 1 # update the board. rowNames = "abcdefgh" placedRow = 8 - int(move[3]) placedCol = int(rowNames.find(move[2])) if self.plies % 2 == 0: self.arrayBoard[placedRow][placedCol] = move[0] if self.plies % 2 == 1: self.arrayBoard[placedRow][placedCol] = move[0].lower() # once everything is done, update move count self.updateNumpyBoards() self.plies += 1
"e1g1", "h4g2", "g1g2", "P@e4", "d3b5", "c8d7", "e2f4", "c6e5", "b5d7", "d8d7", "f2f3", "e4f3", "g2g1", "P@e2", "f4e2", "f3e2", "d1e2", "B@d3", "e2e5", "N@h3", "g1g2", "d3f1", "g2f1", "R@g1", "f1e2", "g1c1", "N@e3", "B@f4", "e5h5", "h3g1", "e2d2", "f4e3", "d2e3", "N@c4", "e3f2", "P@c2", "B@d3", "c2b1q", "a1b1", "N@h3", "f2g3", "c1b1", "d3b1", "f8d6", "P@e5", "d6e5", "d4e5", "R@g2", "g3g2", "c4e3", "g2g3", "P@f4", "g3h4", "e3g2", "h4g4", "h3f2", "g4g5", "h7h6", "h5h6", "g1f3", "g5h5", "h8h6"] ] inputs = np.zeros(1) outputs = np.zeros(1) for j in range(len(listOfMoves)): board = ChessEnvironment() for i in range(len(listOfMoves[j])): state = board.boardToState() action = ActionToArray.moveArray(listOfMoves[j][i], board.arrayBoard) if board.board.legal_moves.count() != len(ActionToArray.legalMovesForState(board.arrayBoard, board.board)): print("ERROR!") board.makeMove(listOfMoves[j][i]) if j == 0: if i == 0: inputs = state outputs = action else: inputs = np.concatenate((inputs, state)) outputs = np.concatenate((outputs, action)) else: seenBefore = False for k in range(len(inputs)): if np.sum(abs(inputs[k].flatten()-state.flatten())) == 0:
if turn == 1: output = 1 - output # now, let's return our evaluation # print(output) return output testing = False if testing: neuralNet = ChessResNet.ResNetDoubleHead() neuralNet.load_state_dict( torch.load('New Networks/(MCTS)(6X256|4|8)(V4)(DESKTOP)64fish.pt')) neuralNet.double() neuralNet.eval() board = ChessEnvironment() start = time.time() print( moveValueEvaluations( ActionToArray.legalMovesForState(board.arrayBoard, board.board), board, neuralNet)) end = time.time() print("time taken:", end - start) start = time.time() print( moveValueEvaluationsNew( ActionToArray.legalMovesForState(board.arrayBoard, board.board), board, neuralNet)) end = time.time() print("time taken:", end - start)
position = board.boardToString() if position not in model.dictionary: state = torch.from_numpy(board.boardToState()) nullAction = torch.from_numpy( np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader( dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: model.neuralNet.eval() outputs = model.neuralNet(images)[0] if playouts > 0: model.addPositionToMCTS( board.boardToString(), ActionToArray.legalMovesForState( board.arrayBoard, board.board), board.arrayBoard, outputs, board) else: model.dictionary[board.boardToString()] = len( model.dictionary) policy = ActionToArray.moveEvaluations( ActionToArray.legalMovesForState( board.arrayBoard, board.board), board.arrayBoard, outputs) model.childrenPolicyEval.append(policy) model.childrenMoveNames.append( ActionToArray.legalMovesForState( board.arrayBoard, board.board)) directory = model.dictionary[board.boardToString()] if playouts > 0: index = np.argmax(
def NetworkCompetitionWhite(bestNet, testingNet, playouts, round="1"): score = 0 PGN = chess.pgn.Game() PGN.headers["Event"] = "Neural Network Comparison Test" PGN.headers["Site"] = "Cozy Computer Lounge" PGN.headers["Date"] = datetime.datetime.today().strftime('%Y-%m-%d %H:%M') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + bestNet.nameOfNetwork PGN.headers["Black"] = "Network: " + testingNet.nameOfNetwork PGN.headers["Variant"] = "crazyhouse" sim = ChessEnvironment() while sim.result == 2: #print("Win Probability:", ValueEvaluation.positionEval(sim, bestNet.neuralNet)) noiseVal = 1.0 / (2 * (sim.plies // 2 + 1)) if sim.plies % 2 == 0: if playouts > 0: bestNet.competitivePlayoutsFromPosition(playouts, sim) else: position = sim.boardToString() if position not in bestNet.dictionary: state = torch.from_numpy(sim.boardToState()) nullAction = torch.from_numpy(np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: bestNet.neuralNet.eval() #start = time.time() outputs = bestNet.neuralNet(images)[0] #end = time.time() #print(end-start) if playouts > 0: bestNet.addPositionToMCTS(sim.boardToString(), ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs, sim) else: bestNet.dictionary[sim.boardToString()] = len(bestNet.dictionary) policy = ActionToArray.moveEvaluations(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs) bestNet.childrenMoveNames.append(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)) bestNet.childrenPolicyEval.append(policy) directory = bestNet.dictionary[sim.boardToString()] if playouts > 0: index = np.argmax( MCTSCrazyhouse.PUCT_Algorithm(bestNet.childrenStateWin[directory], bestNet.childrenStateSeen[directory], 1, np.sum(bestNet.childrenStateSeen[directory]), bestNet.childrenValueEval[directory], MCTSCrazyhouse.noiseEvals(bestNet.childrenPolicyEval[directory], noiseVal)) ) else: index = np.argmax(MCTSCrazyhouse.noiseEvals(bestNet.childrenPolicyEval[directory], noiseVal)) move = bestNet.childrenMoveNames[directory][index] if chess.Move.from_uci(move) not in sim.board.legal_moves: move = ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)[0] #print(move) sim.makeMove(move) sim.gameResult() elif sim.plies % 2 == 1: if playouts > 0: testingNet.competitivePlayoutsFromPosition(playouts, sim) else: position = sim.boardToString() if position not in testingNet.dictionary: state = torch.from_numpy(sim.boardToState()) nullAction = torch.from_numpy(np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: testingNet.neuralNet.eval() outputs = testingNet.neuralNet(images)[0] if playouts > 0: testingNet.addPositionToMCTS(sim.boardToString(), ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs, sim) else: testingNet.dictionary[sim.boardToString()] = len(testingNet.dictionary) policy = ActionToArray.moveEvaluations(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs) testingNet.childrenMoveNames.append(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)) testingNet.childrenPolicyEval.append(policy) directory = testingNet.dictionary[sim.boardToString()] if playouts > 0: index = np.argmax( MCTSCrazyhouse.PUCT_Algorithm(testingNet.childrenStateWin[directory], testingNet.childrenStateSeen[directory], 1, np.sum(testingNet.childrenStateSeen[directory]), testingNet.childrenValueEval[directory], MCTSCrazyhouse.noiseEvals(testingNet.childrenPolicyEval[directory], noiseVal)) ) else: index = np.argmax(MCTSCrazyhouse.noiseEvals(testingNet.childrenPolicyEval[directory], noiseVal)) move = testingNet.childrenMoveNames[directory][index] if chess.Move.from_uci(move) not in sim.board.legal_moves: move = ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)[0] #print(move) sim.makeMove(move) sim.gameResult() if sim.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) #print(sim.board) if sim.result == 1: PGN.headers["Result"] = "1-0" if sim.result == 0: PGN.headers["Result"] = "1/2-1/2" score = 0.5 if sim.result == -1: PGN.headers["Result"] = "0-1" score = 1 print(PGN) return score
def NetworkCompetitionWhite(bestNet, playouts, round="1"): PGN = chess.pgn.Game() PGN.headers["Event"] = "Neural Network Comparison Test" PGN.headers["Site"] = "Cozy Computer Lounge" PGN.headers["Date"] = datetime.datetime.today().strftime('%Y-%m-%d %H:%M') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + bestNet.nameOfNetwork PGN.headers["Black"] = "You" PGN.headers["Variant"] = "crazyhouse" sim = ChessEnvironment() while sim.result == 2: noiseVal = 0.0 / (10 * (sim.plies // 2 + 1)) if sim.plies % 2 == 0: if playouts > 0: start = time.time() bestNet.competitivePlayoutsFromPosition(playouts, sim) end = time.time() print(end - start) else: position = sim.boardToString() if position not in bestNet.dictionary: image = torch.from_numpy(sim.boardToState()) outputs = bestNet.neuralNet(image)[0] if playouts > 0: bestNet.addPositionToMCTS( sim.boardToString(), ActionToArray.legalMovesForState( sim.arrayBoard, sim.board), sim.arrayBoard, outputs, sim) else: bestNet.dictionary[sim.boardToString()] = len( bestNet.dictionary) policy = ActionToArray.moveEvaluations( ActionToArray.legalMovesForState( sim.arrayBoard, sim.board), sim.arrayBoard, outputs) bestNet.childrenMoveNames.append( ActionToArray.legalMovesForState( sim.arrayBoard, sim.board)) bestNet.childrenPolicyEval.append(policy) directory = bestNet.dictionary[sim.boardToString()] if playouts > 0: index = np.argmax( MCTSCrazyhouse.PUCT_Algorithm( bestNet.childrenStateWin[directory], bestNet.childrenStateSeen[directory], 1, np.sum(bestNet.childrenStateSeen[directory]), bestNet.childrenValueEval[directory], MCTSCrazyhouse.noiseEvals( bestNet.childrenPolicyEval[directory], noiseVal))) else: index = np.argmax( MCTSCrazyhouse.noiseEvals( bestNet.childrenPolicyEval[directory], noiseVal)) move = bestNet.childrenMoveNames[directory][index] if chess.Move.from_uci(move) not in sim.board.legal_moves: move = ActionToArray.legalMovesForState( sim.arrayBoard, sim.board)[0] # PRINT WIN PROBABILITY W/ MCTS? print("-----") print(move) print("Win Probability: {:.4f} %".format( 100 * ValueEvaluation.positionEval(sim, bestNet.neuralNet))) if playouts > 0 and bestNet.childrenStateSeen[directory][index] > 0: mctsWinRate = 100 * bestNet.childrenStateWin[directory][ index] / bestNet.childrenStateSeen[directory][index] print("MCTS Win Probability: {:.4f} %".format(mctsWinRate)) totalWinRate = (100 * ValueEvaluation.positionEval( sim, bestNet.neuralNet) + mctsWinRate) / 2 print("Total Win Probability: {:.4f} %".format(totalWinRate)) print("-----") sim.makeMove(move) sim.gameResult() elif sim.plies % 2 == 1: legal = False while not legal: move = input("Enter move: ") if len(move) == 4 or len(move) == 5: if chess.Move.from_uci(move) in sim.board.legal_moves: legal = True else: print("Illegal move! Try again:") else: print("Illegal move! Try again:") print(move) sim.makeMove(move) sim.gameResult() if sim.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) print(sim.board) print("WHITE POCKET") print(sim.whiteCaptivePieces) print("BLACK POCKET") print(sim.blackCaptivePieces) if sim.result == 1: PGN.headers["Result"] = "1-0" if sim.result == 0: PGN.headers["Result"] = "1/2-1/2" if sim.result == -1: PGN.headers["Result"] = "0-1" print(PGN)
def createTrainingGames(self, numberOfGames, playouts): trainingParentStates = np.zeros(1) trainingStatesSeen = [] trainingStatesWin = [] trainingStatesName = [] trainingWinPercentages = [] for i in range(numberOfGames): newParentStates, \ newStatesSeen, \ newStatesWin, \ newStatesName = self.simulateTrainingGame(playouts, round=str(int(i + 1))) if i == 0: # if nothing has been added yet trainingParentStates = newParentStates trainingStatesSeen = newStatesSeen trainingStatesWin = newStatesWin trainingStatesName = newStatesName if i != 0: removeDirectories = [] for k in range(len(trainingParentStates)): for j in range(len(newParentStates)): if np.sum((abs(trainingParentStates[k].flatten() - newParentStates[j].flatten()))) == 0: # If information is already in dataset, edit existing data trainingStatesWin[ k] = trainingStatesWin[k] + newStatesWin[j] trainingStatesSeen[ k] = trainingStatesSeen[k] + newStatesSeen[j] removeDirectories.append(j) removeDirectories.sort() while len(removeDirectories) > 0: index = removeDirectories.pop() newParentStates = np.delete(newParentStates, index, axis=0) del newStatesSeen[index] del newStatesWin[index] del newStatesName[index] trainingParentStates = np.concatenate( (trainingParentStates, newParentStates), axis=0) trainingStatesSeen = trainingStatesSeen + newStatesSeen trainingStatesWin = trainingStatesWin + newStatesWin trainingStatesName = trainingStatesName + newStatesName # Create win percentage for all moves: for j in range(len(trainingStatesWin) ): # length of tSW and tSS should be the same newEntry = np.divide(trainingStatesWin[j], trainingStatesSeen[j], out=np.zeros_like(trainingStatesWin[j]), where=trainingStatesSeen[j] != 0) trainingWinPercentages.append(newEntry) # return the information. trainingWinPercentages has to be converted to a numpy array of correct shape! print("Size of Training Material: ", len(trainingParentStates)) print(len(trainingWinPercentages)) print(len(trainingStatesName)) print(len(trainingParentStates)) print(trainingParentStates.shape) # now, for each trainingWinPercentages and trainingStatesName, convert this into an output that the NN can train on. trainingParentActions = np.zeros(1) # create output for nn for k in range(len(trainingStatesWin)): actionTaken = np.zeros((1, 4504)) # find the board position when move was played. blankBoard = [ [" ", " ", " ", " ", " ", " ", " ", " "], # 0 - 7 [" ", " ", " ", " ", " ", " ", " ", " "], # 8 - 15 [" ", " ", " ", " ", " ", " ", " ", " "], # 16 - 23 [" ", " ", " ", " ", " ", " ", " ", " "], # 24 - 31 [" ", " ", " ", " ", " ", " ", " ", " "], # 32 - 39 [" ", " ", " ", " ", " ", " ", " ", " "], # 40 - 47 [" ", " ", " ", " ", " ", " ", " ", " "], # 48 - 55 [" ", " ", " ", " ", " ", " ", " ", " "] ] # 56 - 63 for i in range(64): pieces = "PNBRQKpnbrqk" for j in range(len(pieces)): if trainingParentStates[k].flatten()[(j * 64) + i] == 1: blankBoard[i // 8][i % 8] = pieces[j] # this is the board. #print(blankBoard) # this is the move chosen #print(trainingStatesName[k][np.argmax(trainingStatesSeen[k])]) for l in range(len(trainingStatesName[k])): if l == 0: actionTaken = ActionToArray.moveArray( trainingStatesName[k][l], blankBoard) * trainingStatesWin[k][l] else: additionalAction = ActionToArray.moveArray( trainingStatesName[k][l], blankBoard) * trainingStatesWin[k][l] actionTaken = actionTaken + additionalAction if k == 0: trainingParentActions = actionTaken else: trainingParentActions = np.concatenate( (trainingParentActions, actionTaken), axis=0) #print(np.sum(trainingParentActions, axis=1)) return trainingParentStates, trainingParentActions
def playout( self, round, explorationConstant=0.15, # lower? will test more. notFromBeginning=False, arrayBoard=0, pythonBoard=0, plies=0, wCap=0, bCap=0, actuallyAPawn=0, noise=True, printPGN=True ): # Here is the information just for starting at a different position if printPGN: PGN = chess.pgn.Game() PGN.headers["Event"] = "Playout" PGN.headers["Site"] = "Nayoung's Home" PGN.headers["Date"] = datetime.datetime.today().strftime( '%Y-%m-%d') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + self.nameOfNetwork PGN.headers["Black"] = "Network: " + self.nameOfNetwork PGN.headers["Variant"] = "Crazyhouse" whiteParentStateDictionary = [] whiteStateSeen = [] whiteStateWin = [] blackParentStateDictionary = [] blackStateSeen = [] blackStateWin = [] tempBoard = ChessEnvironment() if notFromBeginning: tempBoard.arrayBoard = arrayBoard tempBoard.board = pythonBoard tempBoard.plies = plies tempBoard.whiteCaptivePieces = wCap tempBoard.blackCaptivePieces = bCap tempBoard.actuallyAPawn = actuallyAPawn tempBoard.updateNumpyBoards() while tempBoard.result == 2: position = tempBoard.boardToString() if position not in self.dictionary: # Create a new entry in the tree, if the state is not seen before. state = torch.from_numpy(tempBoard.boardToState()) nullAction = torch.from_numpy(np.zeros( (1, 4504))) # this will not be used, is only a filler testSet = MyDataset(state, nullAction) generatePredic = torch.utils.data.DataLoader( dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels in generatePredic: outputs = self.neuralNet(images) self.addPositionToMCTS( tempBoard.boardToString(), ActionToArray.legalMovesForState( tempBoard.arrayBoard, tempBoard.board), tempBoard.arrayBoard, outputs) # find and make the preferred move if noise: noiseConstant = 0.6 / (2.5 * (1 + tempBoard.plies)) else: noiseConstant = 0 if len(self.childrenStateWin) > 0: index = np.argmax( PUCT_Algorithm( self.childrenStateWin[ len(self.childrenStateSeen) - 1], self.childrenStateSeen[ len(self.childrenStateSeen) - 1], explorationConstant, np.sum(self.childrenStateSeen[ len(self.childrenStateSeen) - 1]), noiseEvals( self.childrenNNEvaluation[ len(self.childrenStateSeen) - 1], noiseConstant))) else: index = 0 move = self.childrenMoveNames[ len(self.childrenStateSeen) - 1][index] if chess.Move.from_uci( move) not in tempBoard.board.legal_moves: print("Not legal move.") # play a random move move = self.childrenMoveNames[ len(self.childrenStateSeen) - 1][0] # print(move) tempBoard.makeMove(move) actionVector = np.zeros( len(self.childrenMoveNames[ len(self.childrenStateSeen) - 1])) actionVector[index] = 1 else: # find the directory of the move directory = self.dictionary[position] if noise: noiseConstant = 0.6 / (2.5 * (1 + tempBoard.plies)) else: noiseConstant = 0 index = np.argmax( PUCT_Algorithm( self.childrenStateWin[directory], self.childrenStateSeen[directory], explorationConstant, np.sum(self.childrenStateSeen[directory]), noiseEvals(self.childrenNNEvaluation[directory], noiseConstant))) move = self.childrenMoveNames[directory][index] # print(move) tempBoard.makeMove(move) # the move will have to be indexed correctly based on where the position is. actionVector = np.zeros(len(self.childrenMoveNames[directory])) actionVector[index] = 1 if printPGN: if tempBoard.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) # add this into the actions chosen. if tempBoard.plies % 2 == 1: # white has moved. whiteParentStateDictionary.append(position) whiteStateSeen.append(actionVector) else: # black has moved blackParentStateDictionary.append(position) blackStateSeen.append(actionVector) # print(tempBoard.board) tempBoard.gameResult() if tempBoard.result == 1: # white victory for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i]) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0) if printPGN: PGN.headers["Result"] = "1-0" if tempBoard.result == -1: # black victory for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j]) # this is okay, because if the game is played til checkmate then # this ensures that the move count for both sides is equal. if printPGN: PGN.headers["Result"] = "0-1" if tempBoard.result == 0: # 'tis a tie for i in range(len(whiteStateSeen)): whiteStateWin.append(whiteStateSeen[i] * 0.5) for j in range(len(blackStateSeen)): blackStateWin.append(blackStateSeen[j] * 0.5) if printPGN: PGN.headers["Result"] = "1/2-1/2" # Print PGN and final state if printPGN: print("PGN: ") print(PGN) # now, add the information into the MCTS database. for i in range(len(whiteStateSeen)): directory = self.dictionary[whiteParentStateDictionary[i]] self.childrenStateSeen[directory] = self.childrenStateSeen[ directory] + whiteStateSeen[i] self.childrenStateWin[directory] = self.childrenStateWin[ directory] + whiteStateWin[i] for i in range(len(blackStateSeen)): directory = self.dictionary[blackParentStateDictionary[i]] self.childrenStateSeen[directory] = self.childrenStateSeen[ directory] + blackStateSeen[i] self.childrenStateWin[directory] = self.childrenStateWin[ directory] + blackStateWin[i] if printPGN: print(tempBoard.board) self.printSize()
# so far, output gives a winning probability from -1 to 1, 1 for white, -1 for black. We want to scale this to # a value between 0 and 1. output = (output / 2) + 0.5 # now we have an evaluation from 0 to 1. Now we have to scale this to a probability # for either black or white depending on who moves next. turn = evalBoard.plies % 2 # if plies is not divisible by 2, then it is black to move. if turn == 1: output = 1 - output # now, let's return our evaluation # print(output) return output testing = False if testing: hi = ChessEnvironment() hi.printBoard() moves = ActionToArray.legalMovesForState(hi.arrayBoard, hi.board) print(moves) network = torch.load("New Networks/1712-finalnet.pt") evaluations = moveValueEvaluations(moves, hi, network) print(evaluations) eval = positionEval(hi, network) print(eval)
print("", end="") f = open("Training Data/201805games2000.txt", "w+") for i in range(len(listOfMoves)): print(listOfMoves[i], ",") f.write(str(listOfMoves[i]) + ",\n") f.close() inList = [] outList = [] for j in range(len(listOfMoves)): board = ChessEnvironment() for i in range(len(listOfMoves[j])): state = board.boardToState() action = ActionToArray.moveArray(listOfMoves[j][i], board.arrayBoard) for k in range(320, 384): action[0][k] = 0 if board.board.legal_moves.count() != len( ActionToArray.legalMovesForState(board.arrayBoard, board.board)): print("ERROR!") board.makeMove(listOfMoves[j][i]) # add it to database inList.append(state) outList.append(np.argmax(action)) print(board.board) board.gameResult() print(board.gameStatus)
def simulateTrainingGame(self, playouts, round="1"): PGN = chess.pgn.Game() PGN.headers["Event"] = "Simulated Training Game" PGN.headers["Site"] = "Cozy Computer Lounge" PGN.headers["Date"] = datetime.datetime.today().strftime('%Y-%m-%d %H:%M') PGN.headers["Round"] = round PGN.headers["White"] = "Network: " + self.nameOfNetwork PGN.headers["Black"] = "Network: " + self.nameOfNetwork PGN.headers["Variant"] = "crazyhouse" sim = ChessEnvironment() while sim.result == 2: if playouts == 0: position = sim.boardToString() if position not in self.dictionary: state = torch.from_numpy(sim.boardToState()) nullAction = torch.from_numpy(np.zeros(1)) # this will not be used, is only a filler testSet = DoubleHeadDataset(state, nullAction, nullAction) generatePredic = torch.utils.data.DataLoader(dataset=testSet, batch_size=len(state), shuffle=False) with torch.no_grad(): for images, labels1, labels2 in generatePredic: outputs = self.neuralNet(images)[0] self.dictionary[sim.boardToString()] = len(self.dictionary) policy = ActionToArray.moveEvaluations(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs) self.childrenPolicyEval.append(policy) self.childrenMoveNames.append(ActionToArray.legalMovesForState(sim.arrayBoard, sim.board)) directory = self.dictionary[sim.boardToString()] index = np.argmax(noiseEvals(self.childrenPolicyEval[directory], 3.0 / (6 * ((sim.plies // 2) + 1)))) move = self.childrenMoveNames[directory][index] moveNames = self.childrenMoveNames[directory] else: self.trainingPlayoutsFromPosition(playouts, sim) position = sim.boardToString() if position not in self.dictionary: state = torch.from_numpy(sim.boardToState()) action = torch.from_numpy(np.zeros(1)) data = DoubleHeadDataset(state, action, action) testLoader = torch.utils.data.DataLoader(dataset=data, batch_size=1, shuffle=False) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') start = time.time() for images, irrelevant1, irrelevant2 in testLoader: images = images.to(device) outputs = self.neuralNet(images)[0] end = time.time() print("BLAH:", end-start) self.addPositionToMCTS(sim.boardToString(), ActionToArray.legalMovesForState(sim.arrayBoard, sim.board), sim.arrayBoard, outputs, sim) directory = self.dictionary[sim.boardToString()] index = np.argmax( PUCT_Algorithm(self.childrenStateWin[directory], self.childrenStateSeen[directory], 2**0.5, # 0.25-0.30 guarantees diversity np.sum(self.childrenStateSeen[directory]), self.childrenValueEval[directory], noiseEvals(self.childrenPolicyEval[directory], 2.1 / (7 * ((sim.plies // 2) + 1)))) ) move = self.childrenMoveNames[directory][index] moveNames = self.childrenMoveNames[directory] actionVector = np.zeros(len(self.childrenMoveNames[directory])) actionVector[index] = 1 sim.makeMove(move) sim.gameResult() if sim.plies == 1: node = PGN.add_variation(chess.Move.from_uci(move)) else: node = node.add_variation(chess.Move.from_uci(move)) if sim.result == 1: PGN.headers["Result"] = "1-0" if sim.result == 0: PGN.headers["Result"] = "1/2-1/2" if sim.result == -1: PGN.headers["Result"] = "0-1" print(PGN) return PGN