def main(): print("Decoder.py main()") import tictactoe import pickle import utilities encoder = autoencoder.position.Net() encoder.Load( '/home/sebastien/projects/DeepReinforcementLearning/autoencoder/outputs/AutoencoderNet_(2,1,3,3)_[(3,64,1)]_32_noZeroPadding_tictactoeAutoencoder_1000.pth' ) print("main(): encoder = {}".format(encoder)) mlp = BuildAnMLPDecoderFromAnAutoencoder(encoder, [8, 2]) print("main(): mlp = {}".format(mlp)) authority = tictactoe.Authority() playersList = authority.PlayersList() inputTensor = authority.InitialPosition() #inputTensor[1, 0, 0, 0] = 1 inputTensor[0, 0, 0, 1] = 1 #inputTensor[0, 0, 0, 2] = 1 #inputTensor[0, 0, 1, 0] = 1 inputTensor[1, 0, 1, 1] = 1 #inputTensor[1, 0, 1, 2] = 1 #inputTensor[0, 0, 2, 0] = 1 #inputTensor[1, 0, 2, 1] = 1 #inputTensor[0, 0, 2, 2] = 1 authority.Display(inputTensor) outputTensor = mlp(inputTensor.unsqueeze(0)) print("main(): outputTensor = \n{}".format(outputTensor))
def main(): print("ComparisonNet.py main()") import tictactoe import utilities import autoencoder autoencoderNet = autoencoder.position.Net() autoencoderNet.Load( '/home/sebastien/projects/DeepReinforcementLearning/autoencoder/outputs/AutoencoderNet_(2,1,3,3)_[(3,64,1)]_32_noZeroPadding_tictactoeAutoencoder_1000.pth' ) decoderClassifier = BuildADecoderClassifierFromAnAutoencoder( autoencoderNet) authority = tictactoe.Authority() currentPosition = authority.InitialPosition() playersList = authority.PlayersList() candidatePositionsAfterMoveWinnerPairs = utilities.LegalCandidatePositionsAfterMove( authority, currentPosition, playersList[0]) candidatePositionsAfterMoveList = [ candidatePositionAfterMoveWinnerPair[0] for candidatePositionAfterMoveWinnerPair in candidatePositionsAfterMoveWinnerPairs ] #print ("candidatePositionsAfterMove = {}".format(candidatePositionsAfterMove)) tournamentWinner = Comparison.TournamentWinner( decoderClassifier, candidatePositionsAfterMoveList) print("tournamentWinner = {}".format(tournamentWinner)) positionsList, winner = Comparison.SimulateAGame( decoderClassifier, authority, ) print(positionsList, winner)
def main(): print("Comparison.py main()") import tictactoe import ComparisonNet import autoencoder authority = tictactoe.Authority() autoencoderNet = autoencoder.position.Net() autoencoderNet.Load( '/home/sebastien/projects/DeepReinforcementLearning/autoencoder/outputs/AutoencoderNet_(2,1,3,3)_[(3,64,1)]_32_noZeroPadding_tictactoeAutoencoder_1000.pth' ) comparator = ComparisonNet.BuildADecoderClassifierFromAnAutoencoder( autoencoderNet) position0 = authority.InitialPosition() position0[0, 0, 0, 1] = 1 position1 = authority.InitialPosition() position1[0, 0, 1, 1] = 1 position2 = authority.InitialPosition() position2[0, 0, 0, 0] = 1 position3 = authority.InitialPosition() position3[0, 0, 1, 0] = 1 positionsList = [position0, position1, position2, position3] pairWinnerIndexList = ComparePositionPairs(authority, comparator, positionsList, numberOfGames=20, epsilon=0.1) print("pairWinnerIndexList = {}".format(pairWinnerIndexList))
def main(): print("gamePolicyParameterSweep.py main()") parameterSweptValuesList = ast.literal_eval(args.parameterSweptValues) parametersDic = ast.literal_eval(args.baselineParameters) #print ("main(): parametersDic = {}".format(parametersDic)) # Load the neural network neuralNetwork = moveEvaluation.ConvolutionStack.Net() neuralNetwork.Load(args.neuralNetwork) # Load the game authority if args.game == 'tictactoe': authority = tictactoe.Authority() else: raise NotImplementedError( "main(): The game '{}' is not implemented".format(args.game)) playersList = authority.PlayersList() # Output monitoring file outputFile = open(args.outputFile, "w", buffering=1) # Flush the buffer at each line outputFile.write("{},averageTime,winRate,drawRate,lossRate\n".format( args.sweepParameter)) for sweptValue in parameterSweptValuesList: print("main() {} = {}".format(args.sweepParameter, sweptValue)) if args.sweepParameter not in parametersDic: raise ValueError( "main(): The sweep parameter '{}' is not in the dictionary". format(args.sweepParameter)) parametersDic[args.sweepParameter] = sweptValue """if args.sweepParameter == 'softMaxTemperature': parametersDic['softMaxTemperature'] = sweptValue else: raise NotImplementedError("main(): The sweep parameter '{}' is not implemented".format(args.sweepParameter)) """ startTime = time.time() (averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate, losingGamePositionsListList) = \ policy.AverageRewardAgainstARandomPlayerKeepLosingGames( playersList, authority, neuralNetwork, chooseHighestProbabilityIfAtLeast=parametersDic['chooseHighestProbabilityIfAtLeast'], preApplySoftMax=True, softMaxTemperature=parametersDic['softMaxTemperature'], numberOfGames=args.numberOfGamesPerCell, moveChoiceMode=parametersDic['moveChoiceMode'], numberOfGamesForMoveEvaluation=parametersDic['numberOfGamesPerActionEvaluation'], # ignored by SoftMax depthOfExhaustiveSearch=parametersDic['depthOfExhaustiveSearch'], numberOfTopMovesToDevelop=parametersDic['numberOfTopMovesToDevelop'] ) endTime = time.time() averageTime = (endTime - startTime) / args.numberOfGamesPerCell print( "main(): averageTime = {}; winRate = {}; drawRate = {}; lossRate = {}" .format(averageTime, winRate, drawRate, lossRate)) outputFile.write(str(sweptValue) + ',' + str(averageTime) + ',' + str(winRate) + ',' \ + str(drawRate) + ',' + str(lossRate) + '\n')
def main(): logging.info("test_ensemble.py main()") residualsPopulation = gpevo.ArithmeticsPopulation() residualsPopulation.LoadIndividuals(args.ensembleMembersFilepathPrefix) # Create the autoencoder encoder = autoencoder.position.Net() encoder.Load(args.autoencoder) # Load the features preprocessor preprocessor = pickle.load(open(args.featuresPreprocessor, 'rb')) # Game authority authority = tictactoe.Authority() position = authority.InitialPosition() position[0, 0, 1, 1] = 1 position[1, 0, 0, 1] = 1 position[0, 0, 0, 2] = 1 position[1, 0, 2, 0] = 1 position[0, 0, 2, 2] = 1 #position = authority.SwapPositions(position, 'X', 'O') authority.Display(position) # Encode the position encoding = encoder.Encode(position.unsqueeze(0)) logging.debug("encoding = {}".format(encoding)) # Preprocess the encoding preprocessedEncoding = preprocessor.transform(encoding.detach().numpy())[0] print ("preprocessedEncoding = {}".format(preprocessedEncoding)) # Load the population population = gpevo.ArithmeticsPopulation() population.LoadIndividuals(args.ensembleMembersFilepathPrefix) # Variable names variableNames = list(variableNameToTypeDict) variableNameToValueDict = dict(zip(variableNames, preprocessedEncoding)) logging.debug("variableNameToValueDict = {}".format(variableNameToValueDict)) # Load the interpreter domainFunctionsTree: ET.ElementTree = ET.parse(args.domainPrimitivesFilepath) interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter(domainFunctionsTree) outputsSum = population.SumOfEvaluations([variableNameToValueDict], interpreter, variableNameToTypeDict, 'float') logging.info("outputsSum = {}".format(outputsSum))
def main(): print("testNetEnsemble.py main()") # Get the neural networks filepaths filesDirectory = os.path.dirname(args.neuralNetworksFilepathPrefix) filesList = [ os.path.join(filesDirectory, f) for f in os.listdir(filesDirectory) if os.path.isfile(os.path.join(filesDirectory, f)) ] filesList = [ f for f in filesList if args.neuralNetworksFilepathPrefix in f ] #print ("filesList = {}".format(filesList)) #print ("len(filesList) = {}".format(len(filesList))) neuralNetworksList = [Comparison.Load(filepath) for filepath in filesList] #netEnsemble = Comparison.ComparatorsEnsemble(neuralNetworksList) authority = tictactoe.Authority() positionTsrShape = authority.PositionTensorShape() playersList = authority.PlayersList() if args.testToDo == 'runGames': for numberOfNeuralNetworks in range(1, len(neuralNetworksList) + 1): limitedNetList = [] for neuralNetNdx in range(numberOfNeuralNetworks): limitedNetList.append(neuralNetworksList[neuralNetNdx]) netEnsemble = Comparison.ComparatorsEnsemble(limitedNetList) if True: #numberOfNeuralNetworks % 10 == 1: (numberOfWinsForComparator, numberOfWinsForRandomPlayer, numberOfDraws) = Comparison.SimulateGamesAgainstARandomPlayer( netEnsemble, authority, 100) logging.info( "numberOfNeuralNetworks = {}; numberOfWinsForComparator = {}; numberOfWinsForRandomPlayer = {}; numberOfDraws = {}" .format(numberOfNeuralNetworks, numberOfWinsForComparator, numberOfWinsForRandomPlayer, numberOfDraws)) elif args.testToDo == 'evaluatePosition': positionToEvaluate = torch.zeros(positionTsrShape) positionToEvaluate[0, 0, 0, 2] = 1 positionToEvaluate[1, 0, 0, 0] = 1 authority.Display(positionToEvaluate) numberOfSimulations = 30 numberOfEpsilonSteps = 11 netEnsemble = Comparison.ComparatorsEnsemble(neuralNetworksList) for epsilonNdx in range(numberOfEpsilonSteps): epsilon = epsilonNdx * 1.0 / (numberOfEpsilonSteps - 1) logging.info("epsilon = {}".format(epsilon)) numberOfWinsForPlayer0 = 0 numberOfWinsForPlayer1 = 0 numberOfDraws = 0 for simulationNdx in range(numberOfSimulations): (positionsList, winner) = Comparison.SimulateAGame( netEnsemble, authority, startingPosition=positionToEvaluate, nextPlayer=playersList[0], playerToEpsilonDict={ playersList[0]: epsilon, playersList[1]: epsilon }) if winner == playersList[0]: numberOfWinsForPlayer0 += 1 elif winner == playersList[1]: numberOfWinsForPlayer1 += 1 elif winner == 'draw': numberOfDraws += 1 else: raise ValueError("Unknown winner '{}'".format(winner)) #print ("positionsList = \n{}\nwinner = {}".format(positionsList, winner)) logging.info( "winRateForPlayer0 = {}; winRateForPlayer1 = {}; drawRate = {}" .format(numberOfWinsForPlayer0 / numberOfSimulations, numberOfWinsForPlayer1 / numberOfSimulations, numberOfDraws / numberOfSimulations)) else: raise NotImplementedError("main(): Unknown test type '{}'".format( args.testToDo))
def main(): logging.info("learnTicTacToeWithDecoderRandomForest.py main()") authority = tictactoe.Authority() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() playerList = authority.PlayersList() if args.startWithNeuralNetwork is not None: raise NotImplementedError( "main(): Start with a neural network is not implemented...") else: if args.startWithAutoencoder is not None: autoencoderNet = autoencoder.position.Net() autoencoderNet.Load(args.startWithAutoencoder) decoderRandomForest = Decoder.BuildARandomForestDecoderFromAnAutoencoder( autoencoderNet, args.maximumNumberOfTrees, args.treesMaximumDepth) decoderRandomForest.SetEvaluationMode('mean') print("main(): decoderRandomForest.encodingBodyStructureSeq = {}".format( decoderRandomForest.encodingBodyStructureSeq)) """# Create the optimizer optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, neuralNetwork.parameters()), lr=args.learningRate, betas=(0.5, 0.999)) # Loss function loss = torch.nn.MSELoss() # Initial learning rate learningRate = args.learningRate """ # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write( "epoch,trainingMSE,validationMSE,averageRewardAgainstRandomPlayer,winRate,drawRate,lossRate\n" ) # First game with a random player, before any training (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer( decoderRandomForest, authority, 30) winRate = numberOfWinsForEvaluator / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}". format(winRate, drawRate, lossRate)) epochLossFile.write('0' + ',' + '-' + ',' + '-' + ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') for epoch in range(1, args.numberOfEpochs + 1): logging.info("Epoch {}".format(epoch)) # Generate positions minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions logging.info("Generating positions...") startingPositionsList = Predictor.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForTraining) startingPositionsTensor = StartingPositionsTensor( startingPositionsList) logging.info( "Evaluating expected reward for each starting position...") expectedRewardsList = ExpectedRewardsList(authority, decoderRandomForest, startingPositionsList, args.numberOfSimulations, playerList[1], args.epsilon) #print ("expectedRewardsList = {}".format(expectedRewardsList)) expectedRewardsTensor = ExpectedRewardsTensor(expectedRewardsList) logging.info("Learning from the examples...") decoderRandomForest.LearnFromMinibatch(startingPositionsTensor, expectedRewardsTensor) afterLearningTrainingPredictionsList = decoderRandomForest.Value( startingPositionsTensor) afterLearningTrainingPredictionsTensor = ExpectedRewardsTensor( afterLearningTrainingPredictionsList) trainingMSE = torch.nn.functional.mse_loss( afterLearningTrainingPredictionsTensor, expectedRewardsTensor).item() logging.info("trainingMSE = {}".format(trainingMSE)) # Test on validation positions logging.info("Generating validation positions...") validationStartingPositionsList = Predictor.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForValidation) validationStartingPositionsTensor = StartingPositionsTensor( validationStartingPositionsList) logging.info( "Evaluating expected reward for each validation starting position..." ) validationExpectedRewardsList = ExpectedRewardsList( authority, decoderRandomForest, validationStartingPositionsList, args.numberOfSimulations, playerList[1], args.epsilon) validationExpectedRewardsTensor = ExpectedRewardsTensor( validationExpectedRewardsList) currentValidationPredictionList = decoderRandomForest.Value( validationStartingPositionsTensor) currentValidationPredictionTensor = ExpectedRewardsTensor( currentValidationPredictionList) validationMSE = torch.nn.functional.mse_loss( currentValidationPredictionTensor, validationExpectedRewardsTensor).item() logging.info("validationMSE = {}".format(validationMSE)) # Play against a random player (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer( decoderRandomForest, authority, 30) winRate = numberOfWinsForEvaluator / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}" .format(winRate, drawRate, lossRate)) epochLossFile.write( str(epoch) + ',' + str(trainingMSE) + ',' + str(validationMSE) + ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') filepath = os.path.join(args.outputDirectory, 'tictactoe_' + str(epoch) + '.bin') decoderRandomForest.Save(filepath)
def main() -> None: logging.info("filterFromEnsemble.py main()") # Load the ensemble ensemble: winRatesRegression.RegressorsEnsemble = winRatesRegression.Load( args.ensembleFilepath) authority: tictactoe.Authority = tictactoe.Authority() # Load the encoder encoder: autoencoder.position.Net = autoencoder.position.Net() encoder.Load(args.autoencoderFilepath) neuralNetworkToWinRatesDict: Dict[winRatesRegression.Regressor, Tuple[float, float, float]] = {} rewardsList = [] # Loop through the neural networks for neuralNetwork in ensemble.regressorsList: (numberOfWinsForRegressor, numberOfWinsForRandomPlayer, numberOfDraws) = winRatesRegression.SimulateGamesAgainstARandomPlayer( neuralNetwork, encoder, authority, args.numberOfGames) logging.info( "numberOfWinsForRegressor = {}; numberOfWinsForRandomPlayer = {}; numberOfDraws = {}" .format(numberOfWinsForRegressor, numberOfWinsForRandomPlayer, numberOfDraws)) neuralNetworkToWinRatesDict[neuralNetwork] = ( numberOfWinsForRegressor / args.numberOfGames, numberOfDraws / args.numberOfGames, numberOfWinsForRandomPlayer / args.numberOfGames, ) rewardsList.append( (numberOfWinsForRegressor - numberOfWinsForRandomPlayer) / args.numberOfGames) # Write the win rates winRatesFile = open(os.path.join(args.outputDirectory, 'regressorWinRates.csv'), "w", buffering=1) # Flush the buffer at each line winRatesFile.write( "winRateForRegressor,drawRate,winRateForRandomPlayer,reward\n") for neuralNet, winRatesTuple in neuralNetworkToWinRatesDict.items(): winRatesFile.write("{},{},{},{}\n".format( winRatesTuple[0], winRatesTuple[1], winRatesTuple[2], winRatesTuple[0] - winRatesTuple[2])) # Get the best rewards rewardsList.sort(reverse=True) # Sort in descending order numberOfRegressorsToKeep = round(args.fractionOfBestRegressorsToKeep * len(ensemble.regressorsList)) logging.info( "numberOfRegressorsToKeep = {}".format(numberOfRegressorsToKeep)) print("rewardsList = \n{}".format(rewardsList)) rewardThreshold = rewardsList[numberOfRegressorsToKeep - 1] logging.info("rewardThreshold = {}".format(rewardThreshold)) # Create a new ensemble with the best regressors bestRegressorsList = [] for regressor in ensemble.regressorsList: regressorWinRate = neuralNetworkToWinRatesDict[regressor][0] regressorLossRate = neuralNetworkToWinRatesDict[regressor][2] regressorReward = regressorWinRate - regressorLossRate if regressorReward >= rewardThreshold: bestRegressorsList.append(regressor) logging.info("len(bestRegressorsList) = {}".format( len(bestRegressorsList))) eliteEnsemble = winRatesRegression.RegressorsEnsemble(bestRegressorsList) eliteEnsemble.Save(os.path.join(args.outputDirectory, "eliteEnsemble.bin"))
def main(): logging.info("learnWinRatesFromEncoding.py main()") # Load the data file embeddingWinRatesDF = pandas.read_csv(args.encodingWinRatesFilepath) #print ("embeddingWinRatesDF = {}".format(embeddingWinRatesDF)) attributesTsr, targetWinRatesTsr = DataTensors(embeddingWinRatesDF) numberOfAttributes = attributesTsr.shape[1] numberOfSamples = attributesTsr.shape[0] # Split training and validation samples (trainingAttributesTsr, trainingTargetsTsr, validationAttributesTsr, validationTargetsTsr) = SplitTrainingAndValidation( attributesTsr, targetWinRatesTsr, validationProportion=0.2) #print ("attributesTsr = {}".format(attributesTsr)) #print ("targetWinRatesTsr = {}".format(targetWinRatesTsr)) authority = tictactoe.Authority() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() playerList = authority.PlayersList() # Loss function # loss = torch.nn.MSELoss() # The neural network is a regressor loss = torch.nn.SmoothL1Loss() # Initial learning rate learningRate = args.learningRate for runNdx in range(args.numberOfRuns): logging.info(" +++++++++++++++ Run {} +++++++++++++++".format(runNdx)) # Create the neural network regressor = winRatesRegression.Net( inputNumberOfAttributes=numberOfAttributes, bodyStructureList=neuralNetworkLayerSizesList, dropoutRatio=args.dropoutRatio) # Create the optimizer optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, regressor.parameters()), lr=args.learningRate, betas=(0.5, 0.999)) # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write("epoch,trainingLoss,validationLoss\n") # Autoencoder autoencoderNet = autoencoder.position.Net() autoencoderNet.Load(args.autoencoderFilepath) for epoch in range(1, args.numberOfEpochs + 1): logging.info("Epoch {}".format(epoch)) regressor.train() indicesList = numpy.arange(trainingAttributesTsr.shape[0]) numpy.random.shuffle(indicesList) numberOfminibatches = len(indicesList) // args.minibatchSize trainigLossSum = 0 for minibatchNdx in range(numberOfminibatches): print('.', end='', flush=True) indexNdx0 = minibatchNdx * args.minibatchSize indexNdx1 = (minibatchNdx + 1) * args.minibatchSize minibatchIndicesList = indicesList[indexNdx0:indexNdx1] #print ("minibatchIndicesList = {}".format(minibatchIndicesList)) minibatchAttributesTsr, minibatchTargetsTsr = MinibatchTensors( trainingAttributesTsr, trainingTargetsTsr, minibatchIndicesList) optimizer.zero_grad() # Forward pass minibatchOutputTensor = regressor(minibatchAttributesTsr) # Calculate the error and backpropagate trainingLoss = loss(minibatchOutputTensor, minibatchTargetsTsr) #logging.info("trainingLoss.item() = {}".format(trainingLoss.item())) trainigLossSum += trainingLoss.item() trainingLoss.backward() # Move in the gradient descent direction optimizer.step() averageTrainigLoss = trainigLossSum / numberOfminibatches # ****************** Validation ****************** regressor.eval() validationOutputTsr = regressor(validationAttributesTsr) validationLoss = loss(validationOutputTsr, validationTargetsTsr).item() logging.info("averageTrainigLoss = {}; validationLoss = {}".format( averageTrainigLoss, validationLoss)) epochLossFile.write("{},{},{}\n".format(epoch, averageTrainigLoss, validationLoss)) # ****************** Compare with a random player ************************** if epoch % 10 == 1 or epoch == args.numberOfEpochs: (numberOfWinsForRegressor, numberOfWinsForRandomPlayer, numberOfDraws ) = winRatesRegression.SimulateGamesAgainstARandomPlayer( regressor, autoencoderNet, authority, args.numberOfGamesAgainstRandomPlayer, None) logging.info( "numberOfWinsForRegressor = {}; numberOfWinsForRandomPlayer = {}; numberOfDraws = {}" .format(numberOfWinsForRegressor, numberOfWinsForRandomPlayer, numberOfDraws)) # Save the neural network regressor.Save( os.path.join( args.outputDirectory, 'regressor_' + str(args.stageIndex) + '_' + str(runNdx) + '.bin'))
def main(): print("neuralNetworksTournament.py main()") # Create the game authority if args.game == 'tictactoe': authority = tictactoe.Authority() elif args.game == 'connect4': authority = connect4.Authority() else: raise NotImplementedError("main(): unknown game '{}'".format( args.game)) playersList = authority.PlayersList() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() neuralNetwork1 = moveEvaluation.ConvolutionStack.Net() neuralNetwork1.Load(args.neuralNetwork1) neuralNetwork2 = moveEvaluation.ConvolutionStack.Net() neuralNetwork2.Load(args.neuralNetwork2) neuralNetworks = [neuralNetwork1, neuralNetwork2] numberOfNeuralNetworks1Wins = 0 numberOfNeuralNetworks2Wins = 0 numberOfDraws = 0 neuralNetwork1TotalTime = 0.0 neuralNetwork2TotalTime = 0.0 for gameNdx in range(args.numberOfGames): numberOfPlayedMoves = gameNdx % 2 # Swap the 1st playing neural network et each game winner = None positionTensor = authority.InitialPosition() while winner is None: player = playersList[numberOfPlayedMoves % 2] playingNeuralNetwork = neuralNetworks[numberOfPlayedMoves % 2] if player is playersList[1]: positionTensor = authority.SwapPositions( positionTensor, playersList[0], playersList[1]) startTime = time.time() moveTensor = AskTheNeuralNetworkToChooseAMove( playersList, authority, playingNeuralNetwork, positionTensor, depthOfExhaustiveSearch=args. maximumDepthOfSemiExhaustiveSearch, numberOfTopMovesToDevelop=args.numberOfTopMovesToDevelop, softMaxTemperature=args.softMaxTemperature) endTime = time.time() decisionTime = endTime - startTime positionTensor, winner = authority.Move(positionTensor, playersList[0], moveTensor) if player is playersList[1]: positionTensor = authority.SwapPositions( positionTensor, playersList[0], playersList[1]) if args.displayPositions: authority.Display(positionTensor) print("**********************************************") if winner is playersList[0] and player is playersList[1]: winner = playersList[1] elif winner is playersList[1] and player is playersList[1]: winner = playersList[0] numberOfPlayedMoves += 1 if player is playersList[0]: neuralNetwork1TotalTime += decisionTime else: neuralNetwork2TotalTime += decisionTime if winner is playersList[0]: numberOfNeuralNetworks1Wins += 1 elif winner is playersList[1]: numberOfNeuralNetworks2Wins += 1 elif winner is 'draw': numberOfDraws += 1 else: raise NotImplementedError( 'neuralNetworksTournament.py main(): Unknown winner {}'.format( winner)) numberOfPlayedMoves = numberOfPlayedMoves - gameNdx % 2 # Subtract 1 for odd game index neuralNetwork1WinRate = numberOfNeuralNetworks1Wins / args.numberOfGames neuralNetwork2WinRate = numberOfNeuralNetworks2Wins / args.numberOfGames drawRate = numberOfDraws / args.numberOfGames neuralNetwork1AverageDecisionTime = neuralNetwork1TotalTime / args.numberOfGames neuralNetwork2AverageDecisionTime = neuralNetwork2TotalTime / args.numberOfGames print("neuralNetwork1WinRate = {}".format(neuralNetwork1WinRate)) print("neuralNetwork2WinRate = {}".format(neuralNetwork2WinRate)) print("drawRate = {}".format(drawRate)) print("neuralNetwork1AverageDecisionTime = {}".format( neuralNetwork1AverageDecisionTime)) print("neuralNetwork2AverageDecisionTime = {}".format( neuralNetwork2AverageDecisionTime))
def main(): print("learnTicTacToeWithComparisonNet.py main()") authority = tictactoe.Authority() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() playerList = authority.PlayersList() if args.startWithNeuralNetwork is not None: raise NotImplementedError( "main(): Start with a neural network is not implemented...") else: if args.startWithAutoencoder is not None: autoencoderNet = autoencoder.position.Net() autoencoderNet.Load(args.startWithAutoencoder) decoderClassifier = ComparisonNet.BuildADecoderClassifierFromAnAutoencoder( autoencoderNet, dropoutRatio=0.25) else: raise NotImplementedError( "main(): Starting without an autoencoder is not implemented..." ) # Create the optimizer logging.debug(decoderClassifier) for name, param in decoderClassifier.named_parameters(): if 'decoding' in name: param.requires_grad = True else: param.requires_grad = False print("name = {}; param.requires_grad = {}".format( name, param.requires_grad)) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, decoderClassifier.parameters()), lr=args.learningRate, betas=(0.5, 0.999)) # Loss function loss = torch.nn.CrossEntropyLoss( ) # The neural network is a binary classifier # Initial learning rate learningRate = args.learningRate # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write( "epoch,trainingLoss,validationLoss,validationAccuracy,averageReward,winRate,drawRate,lossRate\n" ) # First game with a random player, before any training decoderClassifier.eval() (numberOfWinsForComparator, numberOfWinsForRandomPlayer, numberOfDraws) = Comparison.SimulateGamesAgainstARandomPlayer( decoderClassifier, authority, args.numberOfGamesAgainstARandomPlayer) print( "(numberOfWinsForComparator, numberOfWinsForRandomPlayer, numberOfDraws) = ({}, {}, {})" .format(numberOfWinsForComparator, numberOfWinsForRandomPlayer, numberOfDraws)) winRate = numberOfWinsForComparator / (numberOfWinsForComparator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / (numberOfWinsForComparator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForComparator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}". format(winRate, drawRate, lossRate)) epochLossFile.write('0' + ',' + '-' + ',' + '-' + ',' + '-,' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') latentRepresentationFile = open(os.path.join(args.outputDirectory, 'latentRepresentation.csv'), "w", buffering=1) #playerToEpsilonDict = {playerList[0]: args.epsilon, playerList[1]: args.epsilon} epsilon = args.epsilon for epoch in range(1, args.numberOfEpochs + 1): logging.info("Epoch {}".format(epoch)) decoderClassifier.train() if epoch % 100 == -1: learningRate = learningRate / 2 for param_group in optimizer.param_groups: param_group['lr'] = learningRate #epsilon = epsilon/2 """if (epoch // 25) %2 == 0: # Optimize decoding for name, param in decoderClassifier.named_parameters(): if 'decoding' in name: param.requires_grad = True else: param.requires_grad = False logging.info("Optimizing decoding layers") else: # Optimize encoding for name, param in decoderClassifier.named_parameters(): if 'decoding' in name: param.requires_grad = False else: param.requires_grad = True logging.info("Optimizing encoding layers") optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, decoderClassifier.parameters()), lr=learningRate, betas=(0.5, 0.999)) """ if epoch > 1 and epoch % 200 == 1: epsilon = epsilon / 2 # Generate positions if epoch % recomputingPeriod == 1: minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions logging.info("Generating positions...") startingPositionsList = Comparison.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForTraining, swapIfOddNumberOfMoves=True) numberOfMajorityX, numberOfMajorityO, numberOfEqualities = Majority( startingPositionsList) #print ("numberOfMajorityX = {}; numberOfMajorityO = {}; numberOfEqualities = {}".format(numberOfMajorityX, numberOfMajorityO, numberOfEqualities)) #print ("main(): startingPositionsList = {}".format(startingPositionsList)) startingPositionsTensor, augmentedStartingPositionsList = StartingPositionsInPairsOfPossibleOptions( startingPositionsList, authority) #print ("main(): augmentedStartingPositionsList = {}".format(augmentedStartingPositionsList)) #print ("main(): startingPositionsTensor.shape = {}".format(startingPositionsTensor.shape)) #print ("main(): startingPositionsTensor = {}".format(startingPositionsTensor)) logging.info("Comparing starting position pairs...") decoderClassifier.eval() pairWinnerIndexList = Comparison.ComparePositionPairs( authority, decoderClassifier, augmentedStartingPositionsList, args.numberOfSimulations, epsilon=0, playerToEpsilonDict={ playerList[0]: epsilon, playerList[1]: epsilon }) #print ("pairWinnerIndexList = {}".format(pairWinnerIndexList)) pairWinnerIndexTsr = PairWinnerIndexTensor(pairWinnerIndexList) decoderClassifier.train() # Since the samples are generated dynamically, there is no need for minibatches: all samples are always new optimizer.zero_grad() # Forward pass outputTensor = decoderClassifier(startingPositionsTensor) # Calculate the error and backpropagate trainingLoss = loss(outputTensor, pairWinnerIndexTsr) logging.info("trainingLoss.item() = {}".format(trainingLoss.item())) trainingLoss.backward() # Move in the gradient descent direction optimizer.step() gradient0AbsMean = decoderClassifier.Gradient0AbsMean() logging.debug("gradient0AbsMean = {}".format(gradient0AbsMean)) # ****************** Validation ****************** decoderClassifier.eval() if epoch % 200 == 1: logging.info("Generating validation positions...") validationStartingPositionsList = Comparison.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForValidation, swapIfOddNumberOfMoves=True) """for validationStartingPositionNdx in range(len(validationStartingPositionsList)): if numpy.random.random() >= 0.5: swappedPosition = authority.SwapPositions(validationStartingPositionsList[validationStartingPositionNdx], playerList[0], playerList[1]) validationStartingPositionsList[validationStartingPositionNdx] = swappedPosition """ # print ("main(): startingPositionsList = {}".format(startingPositionsList)) validationStartingPositionsTensor, validationAugmentedStartingPositionsList = \ StartingPositionsInPairsOfPossibleOptions(validationStartingPositionsList, authority) logging.info("Comparing validation starting position pairs...") validationPairWinnerIndexList = Comparison.ComparePositionPairs( authority, decoderClassifier, validationAugmentedStartingPositionsList, args.numberOfSimulations, epsilon=0, playerToEpsilonDict={ playerList[0]: epsilon, playerList[1]: epsilon }) # Start with purely random games (epsilon = 1) validationPairWinnerIndexTsr = PairWinnerIndexTensor( validationPairWinnerIndexList) # Forward pass validationOutputTensor = decoderClassifier( validationStartingPositionsTensor) # Calculate the validation error validationLoss = loss(validationOutputTensor, validationPairWinnerIndexTsr) # Calculate the validation accuracy validationAccuracy = Accuracy(validationOutputTensor, validationPairWinnerIndexTsr) logging.info( "validationLoss.item() = {}; validationAccuracy = {}".format( validationLoss.item(), validationAccuracy)) # Check if latent representation of pairs are the same validationLatentRepresentationTsr = decoderClassifier.DecodingLatentRepresentation( decodingLayerNdx=1, inputTsr=validationStartingPositionsTensor) print( "validationStartingPositionsTensor.shape = {}; validationLatentRepresentationTsr.shape = {}" .format(validationStartingPositionsTensor.shape, validationLatentRepresentationTsr.shape)) for pairNdx in range(validationLatentRepresentationTsr.shape[0] // 2): if torch.max( torch.abs(validationLatentRepresentationTsr[2 * pairNdx] - validationLatentRepresentationTsr[2 * pairNdx + 1]) ).item() < dead_neuron_zero_threshold: logging.warning( "Pair {} and {} have an identical latent representation". format(2 * pairNdx, 2 * pairNdx + 1)) print("validationStartingPositionsTensor[2 * pairNdx] = {}". format(validationStartingPositionsTensor[2 * pairNdx])) print( "validationStartingPositionsTensor[2 * pairNdx + 1] = {}". format(validationStartingPositionsTensor[2 * pairNdx + 1])) logging.info("Play against a random player...") # Play against a random player (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Comparison.SimulateGamesAgainstARandomPlayer( decoderClassifier, authority, args.numberOfGamesAgainstARandomPlayer) winRate = numberOfWinsForEvaluator / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}" .format(winRate, drawRate, lossRate)) epochLossFile.write( str(epoch) + ',' + str(trainingLoss.item()) + ',' + str(validationLoss.item()) + ',' + str(validationAccuracy) + ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') # Write validation latent representations logging.info("Validation latent representation...") validationLatentRepresentationTsr = decoderClassifier.DecodingLatentRepresentation( decodingLayerNdx=7, inputTsr=validationStartingPositionsTensor) # validationLatentRepresentation1Tsr.shape = torch.Size([ 2 * numberOfPositionsForValidation, decoderClassifier.decodingIntermediateNumberOfNeurons ]) validationLatentRepresentationArr = validationLatentRepresentationTsr.detach( ).numpy() pcaModel = PCAModel.PCAModel(validationLatentRepresentationArr, pca_zero_threshold) pcaModel.TruncateModel(2) validationProjections = pcaModel.Project( validationLatentRepresentationArr) validationProjectionsTsr = torch.from_numpy(validationProjections) validationProjectionsTsr = torch.cat( (validationProjectionsTsr, torch.argmax(validationOutputTensor, dim=1).unsqueeze(1).double()), 1) validationProjectionsTsr = torch.cat( (validationProjectionsTsr, validationPairWinnerIndexTsr.unsqueeze(1).double()), 1) numpy.savetxt(latentRepresentationFile, [validationProjectionsTsr.numpy().flatten()], delimiter=',') if epoch % 10 == 0: filepath = os.path.join(args.outputDirectory, 'tictactoe_' + str(epoch) + '.bin') decoderClassifier.Save(filepath) epsilon0GamePositionsList, epsilon0GameWinner = Comparison.SimulateAGame( decoderClassifier, authority) for position in epsilon0GamePositionsList: authority.Display(position) print(".............\n") # Reinitialize for dead neurons ReinitializeDeadNeurons(decoderClassifier, startingPositionsTensor)
drawRateSum += drawRate winRate1Sum += winRate1 winRate0 = winRate0Sum / numberOfRegressors drawRate = drawRateSum / numberOfRegressors winRate1 = winRate1Sum / numberOfRegressors normalizationFactor = winRate0 + drawRate + winRate1 return (winRate0 / normalizationFactor, drawRate / normalizationFactor, winRate1 / normalizationFactor) if __name__ == '__main__': print("winRatesRegression.py __main__") import tictactoe import autoencoder.position authority = tictactoe.Authority() encoder = autoencoder.position.Net() encoder.Load( '/home/sebastien/projects/DeepReinforcementLearning/autoencoder/outputs/AutoencoderNet_(2,1,3,3)_[(2,128,1),(2,128,1)]_10_noZeroPadding_tictactoeAutoencoder_115.pth' ) ensemble_0 = Load( '/home/sebastien/projects/DeepReinforcementLearning/positionEvaluation/outputs/eliteEnsemble_4.bin' ) ensemble_1 = Load( '/home/sebastien/projects/DeepReinforcementLearning/positionEvaluation/outputs/eliteEnsemble_5.bin' ) (numberOfWinsForRegressor0, numberOfDraws, numberOfWinsForRegressor1) = CompetitionBetweenRegressors( ensemble_0, ensemble_1, authority, encoder, 100, 0.2) print(
def main(): logging.info("regressorCreatesSamples.py main()") authority = tictactoe.Authority() #positionTsrShape = authority.PositionTensorShape() playersList = authority.PlayersList() # Load the ensemble if args.epsilon < 1.0: if args.preAssembledEnsembleFilepath == 'None': neuralNetworksDirectory = os.path.dirname(args.regressorEnsembleFilepath) #print ("neuralNetworksDirectory = {}".format(neuralNetworksDirectory)) neuralNetworksFilenames = \ [filename for filename in os.listdir(neuralNetworksDirectory ) if filename.startswith( os.path.basename( args.regressorEnsembleFilepath) )] #print ("neuralNetworksFilepaths = {}".format(neuralNetworksFilepaths)) neuralNetworksFilepaths = [os.path.join(neuralNetworksDirectory, filename) for filename in neuralNetworksFilenames] #print ("neuralNetworksFilepaths = {}".format(neuralNetworksFilepaths)) neuralNetworksList = [] for filepath in neuralNetworksFilepaths: neuralNet = winRatesRegression.Load(filepath) neuralNetworksList.append(neuralNet) netEnsemble = winRatesRegression.RegressorsEnsemble(neuralNetworksList) netEnsemble.Save(args.netEnsembleFilepath) else: logging.info("Using pre-assembled ensemble {}".format(args.preAssembledEnsembleFilepath)) netEnsemble = winRatesRegression.Load(args.preAssembledEnsembleFilepath) else: netEnsemble = None # Create the autoencoder encoder = autoencoder.position.Net() encoder.Load(args.autoencoderFilepath) numberOfLatentVariables = encoder.numberOfLatentVariables header = '' for latentNdx in range(numberOfLatentVariables): header += 'p' + str(latentNdx) + ',' # Create the output file outputFile = open(args.outputFilepath, "w", buffering=1) # Flush the buffer at each line outputFile.write( header + "player0WinRate,drawRate,player1WinRate\n") for positionNdx in range(1, args.numberOfPositions + 1): logging.info("Generating position {}...".format(positionNdx)) startingPosition = winRatesRegression.SimulateRandomGames(authority, encoder=encoder, minimumNumberOfMovesForInitialPositions=0, maximumNumberOfMovesForInitialPositions=7, numberOfPositions=1, swapIfOddNumberOfMoves=False)[0] authority.Display(startingPosition) numberOfWinsForPlayer0 = 0 numberOfWinsForPlayer1 = 0 numberOfDraws = 0 for simulationNdx in range(args.numberOfSimulationsPerPosition): (positionsList, winner) = winRatesRegression.SimulateAGame(netEnsemble, encoder, authority, startingPosition=startingPosition, nextPlayer=playersList[1], playerToEpsilonDict={playersList[0]: args.epsilon, playersList[1]: args.epsilon}) if winner == playersList[0]: numberOfWinsForPlayer0 += 1 elif winner == playersList[1]: numberOfWinsForPlayer1 += 1 elif winner == 'draw': numberOfDraws += 1 else: raise ValueError("Unknown winner '{}'".format(winner)) # print ("positionsList = \n{}\nwinner = {}".format(positionsList, winner)) player0WinRate = numberOfWinsForPlayer0/args.numberOfSimulationsPerPosition player1WinRate = numberOfWinsForPlayer1/args.numberOfSimulationsPerPosition drawRate = numberOfDraws/args.numberOfSimulationsPerPosition logging.info("winRateForPlayer0 = {}; drawRate = {}; winRateForPlayer1 = {}".format( player0WinRate, drawRate, player1WinRate )) #positionList = startingPosition.flatten().tolist() positionEncoding = encoder.Encode(startingPosition.unsqueeze(0)).flatten().tolist() print ("positionEncoding = {}".format(positionEncoding)) for encodingNdx in range(len(positionEncoding)): outputFile.write("{},".format(positionEncoding[encodingNdx])) outputFile.write("{},{},{}\n".format(player0WinRate, drawRate, player1WinRate))
def main(): print("generateComparisonNetEnsemble.py: main()") authority = tictactoe.Authority() playerList = authority.PlayersList() if args.startWithNeuralNetwork is not None: raise NotImplementedError( "main(): Start with a neural network is not implemented...") else: if args.startWithAutoencoder is not None: autoencoderNet = autoencoder.position.Net() autoencoderNet.Load(args.startWithAutoencoder) else: raise NotImplementedError( "main(): Starting without an autoencoder is not implemented..." ) # Loss function loss = torch.nn.CrossEntropyLoss( ) # The neural network is a binary classifier # Initial learning rate learningRate = args.learningRate epsilon = args.epsilon for neuralNetworkNdx in range(1, args.numberOfNeuralNetworks + 1): logging.info( " ------ Starting training of neural network # {} ------".format( neuralNetworkNdx)) # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'netEnsembleEpochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write( "epoch,trainingLoss,validationLoss,validationAccuracy,averageReward,winRate,drawRate,lossRate\n" ) decoderClassifier = ComparisonNet.BuildADecoderClassifierFromAnAutoencoder( autoencoderNet, dropoutRatio=args.dropoutRatio) # Create the optimizer for name, param in decoderClassifier.named_parameters(): if 'decoding' in name: param.requires_grad = True else: param.requires_grad = True print("name = {}; param.requires_grad = {}".format( name, param.requires_grad)) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, decoderClassifier.parameters()), lr=learningRate, betas=(0.5, 0.999)) terminalConditionIsReached = False epoch = 1 while epoch <= args.numberOfEpochs and not terminalConditionIsReached: logging.info("Epoch {}".format(epoch)) if epoch > 1 and epoch % 50 == 1: epsilon = epsilon / 2 # Generate positions if epoch % args.recomputingPeriod == 1: minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions logging.info("Generating positions...") startingPositionsList = Comparison.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForTraining, swapIfOddNumberOfMoves=True) startingPositionsTensor, augmentedStartingPositionsList = StartingPositionsInPairsOfPossibleOptions( startingPositionsList, authority) logging.info("Comparing starting position pairs...") decoderClassifier.eval() pairWinnerIndexList = Comparison.ComparePositionPairs( authority, decoderClassifier, augmentedStartingPositionsList, args.numberOfSimulations, epsilon=0, playerToEpsilonDict={ playerList[0]: epsilon, playerList[1]: epsilon }) # print ("pairWinnerIndexList = {}".format(pairWinnerIndexList)) pairWinnerIndexTsr = PairWinnerIndexTensor(pairWinnerIndexList) decoderClassifier.train() # Since the samples are generated dynamically, there is no need for minibatches: all samples are always new optimizer.zero_grad() # Forward pass outputTensor = decoderClassifier(startingPositionsTensor) # Calculate the error and backpropagate trainingLoss = loss(outputTensor, pairWinnerIndexTsr) logging.info("trainingLoss.item() = {}".format( trainingLoss.item())) trainingLoss.backward() # Move in the gradient descent direction optimizer.step() # ****************** Validation ****************** decoderClassifier.eval() if epoch % 50 == 1: logging.info("Generating validation positions...") validationStartingPositionsList = Comparison.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForValidation, swapIfOddNumberOfMoves=True) validationStartingPositionsTensor, validationAugmentedStartingPositionsList = \ StartingPositionsInPairsOfPossibleOptions(validationStartingPositionsList, authority) logging.info("Comparing validation starting position pairs...") validationPairWinnerIndexList = Comparison.ComparePositionPairs( authority, decoderClassifier, validationAugmentedStartingPositionsList, args.numberOfSimulations, epsilon=0, playerToEpsilonDict={ playerList[0]: epsilon, playerList[1]: epsilon }) # Start with purely random games (epsilon = 1) validationPairWinnerIndexTsr = PairWinnerIndexTensor( validationPairWinnerIndexList) # Forward pass validationOutputTensor = decoderClassifier( validationStartingPositionsTensor) # Calculate the validation error validationLoss = loss(validationOutputTensor, validationPairWinnerIndexTsr) # Calculate the validation accuracy validationAccuracy = Accuracy(validationOutputTensor, validationPairWinnerIndexTsr) logging.info( "validationLoss.item() = {}; validationAccuracy = {}". format(validationLoss.item(), validationAccuracy)) # Play against a random player logging.info("Play against a random player...") (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Comparison.SimulateGamesAgainstARandomPlayer( decoderClassifier, authority, args.numberOfGamesAgainstARandomPlayer) winRate = numberOfWinsForEvaluator / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}" .format(winRate, drawRate, lossRate)) epochLossFile.write( str(epoch) + ',' + str(trainingLoss.item()) + ',' + str(validationLoss.item()) + ',' + str(validationAccuracy) + ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') if lossRate <= args.lossRateThreshold: # Check with more random games (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Comparison.SimulateGamesAgainstARandomPlayer( decoderClassifier, authority, 100) winRate = numberOfWinsForEvaluator / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "***** With 100 games against a random player, winRate = {}; drawRate = {}; lossRate = {} *****" .format(winRate, drawRate, lossRate)) if lossRate <= args.lossRateThreshold: terminalConditionIsReached = True filepath = os.path.join( args.outputDirectory, args.outputFilenamesPrefix + str(neuralNetworkNdx) + '.bin') decoderClassifier.Save(filepath) epoch += 1
def main(): logging.info("regressorCreatesSamples.py main()") authority = tictactoe.Authority() #positionTsrShape = authority.PositionTensorShape() playersList = authority.PlayersList() # Load the interpreter domainFunctionsTree: ET.ElementTree = ET.parse( args.domainPrimitivesFilepath) interpreter: gp.ArithmeticsInterpreter = gp.ArithmeticsInterpreter( domainFunctionsTree) # Load the ensemble if args.epsilon < 1.0: population = RegressorPopulation( interpreter, variableNameToTypeDict, 'float') #gpevo.ArithmeticsPopulation() population.LoadIndividuals(args.populationMembersFilepathPrefix) else: population = None # Create the autoencoder encoder = autoencoder.position.Net() encoder.Load(args.autoencoderFilepath) numberOfLatentVariables = encoder.numberOfLatentVariables header = '' for latentNdx in range(numberOfLatentVariables): header += 'p' + str(latentNdx) + ',' # Load the features preprocessor preprocessor = pickle.load(open(args.featuresPreprocessor, 'rb')) # Create the output file outputFile = open(args.outputFilepath, "w", buffering=1) # Flush the buffer at each line outputFile.write(header + "player0WinRate,drawRate,player1WinRate\n") for positionNdx in range(1, args.numberOfPositions + 1): logging.info("Generating position {}...".format(positionNdx)) startingPosition = winRatesRegression.SimulateRandomGames( authority, encoder=encoder, minimumNumberOfMovesForInitialPositions=0, maximumNumberOfMovesForInitialPositions=7, numberOfPositions=1, swapIfOddNumberOfMoves=False)[0] authority.Display(startingPosition) numberOfWinsForPlayer0 = 0 numberOfWinsForPlayer1 = 0 numberOfDraws = 0 for simulationNdx in range(args.numberOfSimulationsPerPosition): (positionsList, winner) = winRatesRegression.SimulateAGame( population, encoder, authority, startingPosition=startingPosition, nextPlayer=playersList[1], playerToEpsilonDict={ playersList[0]: args.epsilon, playersList[1]: args.epsilon }, encodingPreprocessor=preprocessor) if winner == playersList[0]: numberOfWinsForPlayer0 += 1 elif winner == playersList[1]: numberOfWinsForPlayer1 += 1 elif winner == 'draw': numberOfDraws += 1 else: raise ValueError("Unknown winner '{}'".format(winner)) # print ("positionsList = \n{}\nwinner = {}".format(positionsList, winner)) player0WinRate = numberOfWinsForPlayer0 / args.numberOfSimulationsPerPosition player1WinRate = numberOfWinsForPlayer1 / args.numberOfSimulationsPerPosition drawRate = numberOfDraws / args.numberOfSimulationsPerPosition logging.info( "winRateForPlayer0 = {}; drawRate = {}; winRateForPlayer1 = {}". format(player0WinRate, drawRate, player1WinRate)) #positionList = startingPosition.flatten().tolist() positionEncoding = encoder.Encode( startingPosition.unsqueeze(0)).flatten().tolist() print("positionEncoding = {}".format(positionEncoding)) for encodingNdx in range(len(positionEncoding)): outputFile.write("{},".format(positionEncoding[encodingNdx])) outputFile.write("{},{},{}\n".format(player0WinRate, drawRate, player1WinRate))
def main(): logging.info("comparatorCreatesSamples.py main()") authority = tictactoe.Authority() positionTsrShape = authority.PositionTensorShape() playersList = authority.PlayersList() # Load the ensemble netEnsemble = Comparison.Load(args.comparatorFilepath) # Create the output file outputFile = open(args.outputFilepath, "w", buffering=1) # Flush the buffer at each line outputFile.write( "p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,player0WinRate,drawRate,player1WinRate\n" ) # Create the autoencoder encoder = autoencoder.position.Net() encoder.Load(args.autoencoderFilepath) for positionNdx in range(1, args.numberOfPositions + 1): logging.info("Generating position {}...".format(positionNdx)) startingPosition = Comparison.SimulateRandomGames( authority, 0, 7, 1, swapIfOddNumberOfMoves=False)[0] authority.Display(startingPosition) numberOfWinsForPlayer0 = 0 numberOfWinsForPlayer1 = 0 numberOfDraws = 0 for simulationNdx in range(args.numberOfSimulationsPerPosition): (positionsList, winner) = Comparison.SimulateAGame( netEnsemble, authority, startingPosition=startingPosition, nextPlayer=playersList[1], playerToEpsilonDict={ playersList[0]: args.epsilon, playersList[1]: args.epsilon }) if winner == playersList[0]: numberOfWinsForPlayer0 += 1 elif winner == playersList[1]: numberOfWinsForPlayer1 += 1 elif winner == 'draw': numberOfDraws += 1 else: raise ValueError("Unknown winner '{}'".format(winner)) # print ("positionsList = \n{}\nwinner = {}".format(positionsList, winner)) player0WinRate = numberOfWinsForPlayer0 / args.numberOfSimulationsPerPosition player1WinRate = numberOfWinsForPlayer1 / args.numberOfSimulationsPerPosition drawRate = numberOfDraws / args.numberOfSimulationsPerPosition logging.info( "winRateForPlayer0 = {}; drawRate = {}; winRateForPlayer1 = {}". format(player0WinRate, drawRate, player1WinRate)) positionList = startingPosition.flatten().tolist() positionEncoding = encoder.Encode( startingPosition.unsqueeze(0)).flatten().tolist() print("positionEncoding = {}".format(positionEncoding)) for encodingNdx in range(len(positionEncoding)): outputFile.write("{},".format(positionEncoding[encodingNdx])) outputFile.write("{},{},{}\n".format(player0WinRate, drawRate, player1WinRate))
def main(): print("learnTicTacToeAutoencoder.py main()") authority = tictactoe.Authority() positionTensorShape = authority.PositionTensorShape() playersList = authority.PlayersList() if args.startWithNeuralNetwork is not None: neuralNetwork = position.Net() neuralNetwork.Load(args.startWithNeuralNetwork) for name, p in neuralNetwork.named_parameters(): logging.info("layer: {}".format(name)) if "layer_0" in name or "layer_1" in name: logging.info("Setting p.requires_grad = False") p.requires_grad = False else: neuralNetwork = position.Net( positionTensorShape, bodyStructure=bodyStructure, numberOfLatentVariables=args.numberOfLatentVariables, zeroPadding=False) # Create the optimizer optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, neuralNetwork.parameters()), lr=args.learningRate, betas=(0.5, 0.999)) # Loss function loss = torch.nn.BCEWithLogitsLoss(pos_weight=torch.Tensor( [args.positiveCaseWeight])) # torch.nn.MSELoss() # Initial learning rate learningRate = args.learningRate # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write("epoch,trainingLoss,validationLoss,errorRate\n") for epoch in range(1, args.numberOfEpochs + 1): logging.info("Epoch {}".format(epoch)) # Set the neural network to training mode neuralNetwork.train() # Generate positions minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) logging.info("Generating {} training random positions".format( args.numberOfPositionsForTraining)) trainingPositionsList = GenerateRandomPositions( args.numberOfPositionsForTraining, playersList, authority, minimumNumberOfMovesForInitialPositions, args.maximumNumberOfMovesForPositions) #print ("trainingPositionsList =\n{}".format(trainingPositionsList)) logging.info("Generating {} validation random positions".format( args.numberOfPositionsForValidation)) validationPositionsList = GenerateRandomPositions( args.numberOfPositionsForValidation, playersList, authority, minimumNumberOfMovesForInitialPositions, args.maximumNumberOfMovesForPositions) trainingLossSum = 0.0 minibatchIndicesList = utilities.MinibatchIndices( len(trainingPositionsList), args.minibatchSize) logging.info("Going through the minibatch") for minibatchNdx in range(len(minibatchIndicesList)): print('.', end='', flush=True) minibatchPositions = [] for index in minibatchIndicesList[minibatchNdx]: # logging.debug("len(positionStatisticsList[{}]) = {}".format(index, len(positionStatisticsList[index]))) minibatchPositions.append(trainingPositionsList[index]) minibatchPositionsTensor = utilities.MinibatchTensor( minibatchPositions) minibatchTargetPositionsTensor = utilities.MinibatchTensor( minibatchPositions) # Autoencoder => target output = input optimizer.zero_grad() # Forward pass outputTensor = neuralNetwork(minibatchPositionsTensor) # Calculate the error and backpropagate trainingLoss = loss(outputTensor, minibatchTargetPositionsTensor) # logging.debug("trainingLoss.item() = {}".format(trainingLoss.item())) trainingLoss.backward() trainingLossSum += trainingLoss.item() # Move in the gradient descent direction optimizer.step() averageTrainingLoss = trainingLossSum / len(minibatchIndicesList) # Compute the validation loss neuralNetwork.eval() validationPositionsTensor = utilities.MinibatchTensor( validationPositionsList) validationOutputTensor = neuralNetwork(validationPositionsTensor) validationLoss = loss(validationOutputTensor, validationPositionsTensor ).item() # Autoencoder => target output = input # Compare the output tensor converted to one-hot with the target oneHotValidationOutputTensor = position.ConvertToOneHotPositionTensor( validationOutputTensor) numberOfErrors = torch.nonzero(validationPositionsTensor.long() - oneHotValidationOutputTensor).shape[0] errorRate = numberOfErrors / max( torch.nonzero(validationPositionsTensor).shape[0], 0) print(" * ") logging.info( "Epoch {}: averageTrainingLoss = {}\tvalidationLoss = {}\terrorRate = {}" .format(epoch, averageTrainingLoss, validationLoss, errorRate)) epochLossFile.write( str(epoch) + ',' + str(averageTrainingLoss) + ',' + str(validationLoss) + ',' + str(errorRate) + '\n') # Save the neural network neuralNetwork.Save(args.outputDirectory, 'tictactoeAutoencoder_' + str(epoch)) # Update the learning rate learningRate = learningRate * args.learningRateExponentialDecay utilities.adjust_lr(optimizer, learningRate)
def main(): print("learnTicTacToe.py main()") authority = tictactoe.Authority() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() playerList = authority.PlayersList() if args.startWithNeuralNetwork is not None: neuralNetwork = moveEvaluation.ConvolutionStack.Net() neuralNetwork.Load(args.startWithNeuralNetwork) else: neuralNetwork = moveEvaluation.ConvolutionStack.Net( positionTensorShape, [(3, 32), (3, 32), (3, 32)], moveTensorShape) # Create the optimizer optimizer = torch.optim.Adam(neuralNetwork.parameters(), lr=args.learningRate, betas=(0.5, 0.999)) # Loss function loss = torch.nn.MSELoss() # Initial learning rate learningRate = args.learningRate # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write( "epoch,averageActionValuesTrainingLoss,averageRewardAgainstRandomPlayer,winRate,drawRate,lossRate\n" ) # Save the initial neural network, and write it's score against a random player #modelParametersFilename = os.path.join(args.outputDirectory, "neuralNet_tictactoe_0.pth") #torch.save(neuralNetwork.state_dict(), modelParametersFilename) neuralNetwork.Save(args.outputDirectory, 'tictactoe_0') (averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate, losingGamePositionsListList) = \ expectedMoveValues.AverageRewardAgainstARandomPlayerKeepLosingGames( playerList, authority, neuralNetwork, args.chooseHighestProbabilityIfAtLeast, True, softMaxTemperature=0.1, numberOfGames=300, moveChoiceMode='SemiExhaustiveMiniMax', numberOfGamesForMoveEvaluation=0, # ignored by SoftMax depthOfExhaustiveSearch=3, numberOfTopMovesToDevelop=3 ) print( "main(): averageRewardAgainstRandomPlayer = {}; winRate = {}; drawRate = {}; lossRate = {}" .format(averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate)) epochLossFile.write('0' + ',' + '-' + ',' + str(averageRewardAgainstRandomPlayer) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') #bestValidationLoss = sys.float_info.max softMaxTemperatureForSelfPlayEvaluation = args.softMaxTemperatureForSelfPlayEvaluation if args.averageTrainingLossToSoftMaxTemperatureForSelfPlayEvaluationDic is not None: averageTrainingLossToSoftMaxTemperatureForSelfPlayEvaluationDic = ast.literal_eval( args. averageTrainingLossToSoftMaxTemperatureForSelfPlayEvaluationDic) else: averageTrainingLossToSoftMaxTemperatureForSelfPlayEvaluationDic = None losingGamesAgainstRandomPlayerPositionsList = [] for epoch in range(1, args.numberOfEpochs + 1): print("epoch {}".format(epoch)) # Set the neural network to training mode neuralNetwork.train() # Generate positions print("Generating positions...") minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions if epoch % 4 == -1: #0: positionStatisticsList = generateMoveStatistics.GenerateMoveStatisticsMultiprocessing( playerList, authority, neuralNetwork, args.proportionOfRandomInitialPositions, (minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions), args.numberOfInitialPositions, args.numberOfGamesForEvaluation, softMaxTemperatureForSelfPlayEvaluation, args.epsilon, args.depthOfExhaustiveSearch, args.chooseHighestProbabilityIfAtLeast, losingGamesAgainstRandomPlayerPositionsList, args.numberOfProcesses) else: positionStatisticsList = generateMoveStatistics.GenerateMoveStatisticsWithMiniMax( playerList, authority, neuralNetwork, (minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions), args.numberOfInitialPositions, args.depthOfExhaustiveSearch, [] #losingGamesAgainstRandomPlayerPositionsList ) # (initialPosition, averageValuesTensor, standardDeviationTensor, legalMovesNMask) #print ("positionStatisticsList = {}".format(positionStatisticsList)) #print ("main(): len(positionToMoveProbabilitiesAndValueDic) = {}".format(len(positionToMoveProbabilitiesAndValueDic))) #positionsList = list(positionToMoveProbabilitiesAndValueDic.keys()) actionValuesLossSum = 0.0 minibatchIndicesList = utilities.MinibatchIndices( len(positionStatisticsList), args.minibatchSize) for minibatchNdx in range(len(minibatchIndicesList)): print('.', end='', flush=True) minibatchPositions = [] minibatchTargetActionValues = [] minibatchLegalMovesMasks = [] for index in minibatchIndicesList[minibatchNdx]: #minibatchPositions.append(positionsList[index]) #if HasAlreadyBeenUsed(positionMoveProbabilityAndValueList[index][0], minibatchPositions): # print ("main(): positionMoveProbabilityAndValueList[index][0] has laready been used") #(minibatchMoveProbabilities, value) = \ # (positionMoveProbabilityAndValueList[index][1], positionMoveProbabilityAndValueList[index][2]) #positionToMoveProbabilitiesAndValueDic[positionsList[index]] #if HasAlreadyBeenUsed(positionMoveProbabilityAndValueList[index][1], minibatchTargetMoveProbabilities): # print ("main(): positionMoveProbabilityAndValueList[index][1] has laready been used") minibatchPositions.append(positionStatisticsList[index][0]) averageValueMinusNStdDev = positionStatisticsList[index][1] - \ args.numberOfStandardDeviationsBelowAverageForValueEstimate * positionStatisticsList[index][2] legalMovesMask = positionStatisticsList[index][3] """averageValueMinusNStdDev = torch.where(positionStatisticsList[index][3] > 0, averageValueMinusNStdDev, positionStatisticsList[index][3].float()) # Get 0 where the mask is 0 minibatchTargetMoveProbabilities.append(averageValueMinusNStdDev) """ averageValueMinusNStdDev = averageValueMinusNStdDev * legalMovesMask.float( ) minibatchTargetActionValues.append(averageValueMinusNStdDev) #if authority.CurrentSum(positionsList[index]) == 1: #print ("main(): sum = {}; value = {}".format(authority.CurrentSum(positionsList[index]), value)) #print ("main(): minibatchMoveProbabilities = \n{}".format(minibatchMoveProbabilities)) minibatchLegalMovesMasks.append(legalMovesMask) #print ("main(): positionStatisticsList[index][0] = {}".format(positionStatisticsList[index][0])) #print ("main(): averageValueMinusNStdDev = {}".format(averageValueMinusNStdDev)) #print ("main(): legalMovesMask = {}".format(legalMovesMask)) #print ("main(): averageValueMinusNStdDev.max().item() = {}".format(averageValueMinusNStdDev.max().item())) minibatchPositionsTensor = utilities.MinibatchTensor( minibatchPositions) minibatchTargetActionValuesTensor = utilities.MinibatchTensor( minibatchTargetActionValues) optimizer.zero_grad() # Forward pass outputActionValuesTensor = neuralNetwork(minibatchPositionsTensor) # Mask the output action values with the legal moves mask for maskNdx in range(len(minibatchLegalMovesMasks)): outputActionValues = outputActionValuesTensor[maskNdx].clone() legalMovesMask = minibatchLegalMovesMasks[maskNdx] maskedOutputActionValues = outputActionValues * legalMovesMask.float( ) outputActionValuesTensor[maskNdx] = maskedOutputActionValues # Calculate the error and backpropagate #print ("outputMoveProbabilitiesTensor.shape = {}".format(outputMoveProbabilitiesTensor.shape)) #print ("minibatchTargetMoveProbabilitiesTensor.shape = {}".format(minibatchTargetMoveProbabilitiesTensor.shape)) #print ("outputValuesTensor.shape = {}".format(outputValuesTensor.shape)) #print ("minibatchTargetValuesTensor.shape = {}".format(minibatchTargetValuesTensor.shape)) actionValuesLoss = loss(outputActionValuesTensor, minibatchTargetActionValuesTensor) try: actionValuesLoss.backward() # trainingLossSum += minibatchLoss.item() actionValuesLossSum += actionValuesLoss.item() # Move in the gradient descent direction optimizer.step() except Exception as exc: print("Caught excetion: {}".format(exc)) print('X', end='', flush=True) averageActionValuesTrainingLoss = actionValuesLossSum / len( minibatchIndicesList) print("\nEpoch {}: averageActionValuesTrainingLoss = {}".format( epoch, averageActionValuesTrainingLoss)) if averageTrainingLossToSoftMaxTemperatureForSelfPlayEvaluationDic is not None: softMaxTemperatureForSelfPlayEvaluation = SoftMaxTemperature( averageActionValuesTrainingLoss, averageTrainingLossToSoftMaxTemperatureForSelfPlayEvaluationDic, args.softMaxTemperatureForSelfPlayEvaluation) # Update the learning rates learningRate = learningRate * args.learningRateExponentialDecay utilities.adjust_lr(optimizer, learningRate) # Save the neural network #if validationLoss < bestValidationLoss: # bestValidationLoss = validationLoss neuralNetwork.Save(args.outputDirectory, 'tictactoe_' + str(epoch)) #modelParametersFilename = os.path.join(args.outputDirectory, "neuralNet_tictactoe_" + str(epoch) + '.pth') #torch.save(neuralNetwork.state_dict(), modelParametersFilename) if epoch % 20 == -1: moveChoiceMode = 'ExpectedMoveValuesThroughSelfPlay' numberOfGames = 100 depthOfExhaustiveSearch = 2 else: moveChoiceMode = 'SemiExhaustiveMiniMax' numberOfGames = 300 depthOfExhaustiveSearch = 3 numberOfTopMovesToDevelop = 3 (averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate, losingGamePositionsListList) = \ expectedMoveValues.AverageRewardAgainstARandomPlayerKeepLosingGames( playerList, authority, neuralNetwork, args.chooseHighestProbabilityIfAtLeast, True, softMaxTemperature=softMaxTemperatureForSelfPlayEvaluation, numberOfGames=numberOfGames, moveChoiceMode=moveChoiceMode, numberOfGamesForMoveEvaluation=41, # ignored by SoftMax depthOfExhaustiveSearch=depthOfExhaustiveSearch, numberOfTopMovesToDevelop=numberOfTopMovesToDevelop ) print( "main(): averageRewardAgainstRandomPlayer = {}; winRate = {}; drawRate = {}; lossRate = {}" .format(averageRewardAgainstRandomPlayer, winRate, drawRate, lossRate)) # Collect the positions from losing games losingGamesAgainstRandomPlayerPositionsList = [] for (losingGamePositionsList, firstPlayer) in losingGamePositionsListList: for positionNdx in range(len(losingGamePositionsList) - 1): if firstPlayer == playerList[0]: # Keep even positions if positionNdx % 2 == 0: losingGamesAgainstRandomPlayerPositionsList.append( losingGamePositionsList[positionNdx]) else: # fistPlayer == playerList[1] -> Keep odd positions if positionNdx % 2 == 1: losingGamesAgainstRandomPlayerPositionsList.append( losingGamePositionsList[positionNdx]) epochLossFile.write( str(epoch) + ',' + str(averageActionValuesTrainingLoss) + ',' + str(averageRewardAgainstRandomPlayer) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') initialPosition = authority.InitialPosition() initialPositionOutput = neuralNetwork(initialPosition.unsqueeze(0)) print("main(): initialPositionOutput = \n{}".format( initialPositionOutput))
def main(): logging.info("learnTicTacToeWithDecoderMLP.py main()") authority = tictactoe.Authority() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() playerList = authority.PlayersList() if args.startWithNeuralNetwork is not None: raise NotImplementedError( "main(): Start with a neural network is not implemented...") else: if args.startWithAutoencoder is not None: autoencoderNet = autoencoder.position.Net() autoencoderNet.Load(args.startWithAutoencoder) decoderMLP = Decoder.BuildAnMLPDecoderFromAnAutoencoder( autoencoderNet, decodingLayerSizesList) else: raise NotImplementedError( "main(): Starting without an autoencoder is not implemented..." ) # Create the optimizer logging.debug(decoderMLP) for name, param in decoderMLP.named_parameters(): if 'decoding' in name: param.requires_grad = True else: param.requires_grad = True print("name = {}; param.requires_grad = {}".format( name, param.requires_grad)) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, decoderMLP.parameters()), lr=args.learningRate, betas=(0.5, 0.999)) # Loss function loss = torch.nn.MSELoss() # Initial learning rate learningRate = args.learningRate # Output monitoring file epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'), "w", buffering=1) # Flush the buffer at each line epochLossFile.write( "epoch,trainingMSE,validationMSE,averageRewardAgainstRandomPlayer,winRate,drawRate,lossRate\n" ) # First game with a random player, before any training (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer( decoderMLP, authority, args.numberOfGamesAgainstARandomPlayer) winRate = numberOfWinsForEvaluator / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / ( numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}". format(winRate, drawRate, lossRate)) epochLossFile.write('0' + ',' + '-' + ',' + '-' + ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') playerToEpsilonDict = { playerList[0]: args.epsilon, playerList[1]: args.epsilon } for epoch in range(1, args.numberOfEpochs + 1): logging.info("Epoch {}".format(epoch)) decoderMLP.train() # Generate positions minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions( epoch) maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions logging.info("Generating positions...") startingPositionsList = Predictor.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForTraining) #print ("main(): startingPositionsList = {}".format(startingPositionsList)) startingPositionsTensor = StartingPositionsTensor( startingPositionsList) logging.info( "Evaluating expected reward for each starting position...") expectedRewardsList = Predictor.ExpectedRewardsList( authority, decoderMLP, startingPositionsList, args.numberOfSimulations, playerList[1], playerToEpsilonDict=playerToEpsilonDict) #print ("main(): expectedRewardsList = {}".format(expectedRewardsList)) expectedRewardsTensor = ExpectedRewardsTensor(expectedRewardsList) # Since the samples are generated dynamically, there is no need for minibatches: all samples are always new optimizer.zero_grad() # Forward pass outputTensor = decoderMLP(startingPositionsTensor) # Calculate the error and backpropagate trainingLoss = loss(outputTensor, expectedRewardsTensor) logging.info("trainingLoss.item() = {}".format(trainingLoss.item())) trainingLoss.backward() # Move in the gradient descent direction optimizer.step() # Validation decoderMLP.eval() validationStartingPositionsList = Predictor.SimulateRandomGames( authority, minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions, args.numberOfPositionsForValidation) validationStartingPositionsTensor = StartingPositionsTensor( validationStartingPositionsList) logging.info( "Evaluating expected reward for each validation starting position..." ) validationExpectedRewardsList = Predictor.ExpectedRewardsList( authority, decoderMLP, validationStartingPositionsList, args.numberOfSimulations, playerList[1], playerToEpsilonDict=playerToEpsilonDict) validationExpectedRewardsTensor = ExpectedRewardsTensor( validationExpectedRewardsList) validationOutputTensor = decoderMLP(validationStartingPositionsTensor) validationLoss = loss(validationOutputTensor, validationExpectedRewardsTensor) logging.info("validationLoss.item() = {}".format( validationLoss.item())) # Play against a random player (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer, numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer( decoderMLP, authority, args.numberOfGamesAgainstARandomPlayer) winRate = numberOfWinsForEvaluator / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) lossRate = numberOfWinsForRandomPlayer / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) drawRate = numberOfDraws / (numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws) logging.info( "Against a random player, winRate = {}; drawRate = {}; lossRate = {}" .format(winRate, drawRate, lossRate)) epochLossFile.write( str(epoch) + ',' + str(trainingLoss.item()) + ',' + str(validationLoss.item()) + ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n') if epoch % 10 == 0: filepath = os.path.join(args.outputDirectory, 'tictactoe_' + str(epoch) + '.bin') decoderMLP.Save(filepath)
def main(): print("gameArena.py main()") # Create the game authority if args.game == 'tictactoe': authority = tictactoe.Authority() elif args.game == 'connect4': authority = connect4.Authority() elif args.game == 'checkers': authority = checkers.Authority() else: raise NotImplementedError("main(): unknown game '{}'".format( args.game)) playersList = authority.PlayersList() positionTensorShape = authority.PositionTensorShape() moveTensorShape = authority.MoveTensorShape() #if type(ast.literal_eval(args.neuralNetwork)) is list: # Neural networks ensemble if args.neuralNetwork is not None and args.neuralNetwork.startswith( '[') and args.neuralNetwork.endswith( ']'): # List => neural networks ensemble committeeMembersList = [] for neuralNetworkFilepath in ast.literal_eval(args.neuralNetwork): committeeMember = moveEvaluation.ConvolutionStack.Net() committeeMember.Load(neuralNetworkFilepath) committeeMembersList.append(committeeMember) neuralNetwork = moveEvaluation.netEnsemble.Committee( committeeMembersList) else: # Single neural network neuralNetwork = moveEvaluation.ConvolutionStack.Net( positionTensorShape, ast.literal_eval(args.networkBodyArchitecture), moveTensorShape) if args.neuralNetwork is not None: neuralNetwork.Load(args.neuralNetwork) winner = None numberOfPlayedMoves = 0 player = playersList[numberOfPlayedMoves % 2] positionTensor = authority.InitialPosition() humanPlayerTurn = 0 if args.opponentPlaysFirst: humanPlayerTurn = 1 """moveTensor = AskTheNeuralNetworkToChooseAMove( playersList, authority, neuralNetwork, args.chooseHighestProbabilityIfAtLeast, positionTensor, args.numberOfGamesForMoveEvaluation, args.softMaxTemperature, epsilon=0, displayExpectedMoveValues=args.displayExpectedMoveValues, depthOfExhaustiveSearch=args.depthOfExhaustiveSearch) """ moveTensor = SemiExhaustiveMinimaxHighestValue( playersList, authority, neuralNetwork, positionTensor, epsilon=0, maximumDepthOfSemiExhaustiveSearch=args.depthOfExhaustiveSearch, numberOfTopMovesToDevelop=args.numberOfTopMovesToDevelop, displayExpectedMoveValues=args.displayExpectedMoveValues, ) positionTensor, winner = authority.Move(positionTensor, playersList[0], moveTensor) numberOfPlayedMoves = 1 player = playersList[numberOfPlayedMoves % 2] authority.Display(positionTensor) while winner is None: print("numberOfPlayedMoves % 2 = {}; humanPlayerTurn = {}".format( numberOfPlayedMoves % 2, humanPlayerTurn)) if numberOfPlayedMoves % 2 == humanPlayerTurn: inputIsLegal = False while not inputIsLegal: try: userInput = input( "Your move ('?' to get the legal moves mask, 'positionTensor' to get the position tensor): " ) if userInput == "?": legalMovesMask = authority.LegalMovesMask( positionTensor, player) print("legalMovesMask = \n{}".format(legalMovesMask)) inputIsLegal = False elif userInput == "positionTensor": print("positionTensor = \n{}".format(positionTensor)) else: positionTensor, winner = authority.MoveWithString( positionTensor, player, userInput) inputIsLegal = True except ValueError as e: print("Caught exception '{}'.\nTry again".format(e)) numberOfPlayedMoves += 1 player = playersList[numberOfPlayedMoves % 2] authority.Display(positionTensor) else: # Neural network turn if player is playersList[1]: positionTensor = authority.SwapPositions( positionTensor, playersList[0], playersList[1]) startTime = time.time() """moveTensor = AskTheNeuralNetworkToChooseAMove( playersList, authority, neuralNetwork, args.chooseHighestProbabilityIfAtLeast, positionTensor, args.numberOfGamesForMoveEvaluation, args.softMaxTemperature, epsilon=0, displayExpectedMoveValues=args.displayExpectedMoveValues, depthOfExhaustiveSearch=args.depthOfExhaustiveSearch) """ moveTensor = SemiExhaustiveMinimaxHighestValue( playersList, authority, neuralNetwork, positionTensor, epsilon=0, maximumDepthOfSemiExhaustiveSearch=args. depthOfExhaustiveSearch, numberOfTopMovesToDevelop=args.numberOfTopMovesToDevelop, displayExpectedMoveValues=args.displayExpectedMoveValues, ) endTime = time.time() decisionTime = endTime - startTime print("decisionTime = {}".format(decisionTime)) positionTensor, winner = authority.Move(positionTensor, playersList[0], moveTensor) if player is playersList[1]: positionTensor = authority.SwapPositions( positionTensor, playersList[0], playersList[1]) if winner is playersList[0] and player is playersList[1]: winner = playersList[1] numberOfPlayedMoves += 1 player = playersList[numberOfPlayedMoves % 2] authority.Display(positionTensor) if winner == 'draw': print("Draw!") else: print("{} won!".format(winner))