Exemplo n.º 1
0
def main():
    logging.info("learnTicTacToeWithDecoderMLP.py main()")

    authority = tictactoe.Authority()
    positionTensorShape = authority.PositionTensorShape()
    moveTensorShape = authority.MoveTensorShape()
    playerList = authority.PlayersList()

    if args.startWithNeuralNetwork is not None:
        raise NotImplementedError(
            "main(): Start with a neural network is not implemented...")
    else:
        if args.startWithAutoencoder is not None:
            autoencoderNet = autoencoder.position.Net()
            autoencoderNet.Load(args.startWithAutoencoder)

            decoderMLP = Decoder.BuildAnMLPDecoderFromAnAutoencoder(
                autoencoderNet, decodingLayerSizesList)
        else:
            raise NotImplementedError(
                "main(): Starting without an autoencoder is not implemented..."
            )

    # Create the optimizer
    logging.debug(decoderMLP)
    for name, param in decoderMLP.named_parameters():
        if 'decoding' in name:
            param.requires_grad = True
        else:
            param.requires_grad = True
        print("name = {}; param.requires_grad = {}".format(
            name, param.requires_grad))

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        decoderMLP.parameters()),
                                 lr=args.learningRate,
                                 betas=(0.5, 0.999))

    # Loss function
    loss = torch.nn.MSELoss()

    # Initial learning rate
    learningRate = args.learningRate

    # Output monitoring file
    epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'),
                         "w",
                         buffering=1)  # Flush the buffer at each line
    epochLossFile.write(
        "epoch,trainingMSE,validationMSE,averageRewardAgainstRandomPlayer,winRate,drawRate,lossRate\n"
    )

    # First game with a random player, before any training
    (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer,
     numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer(
         decoderMLP, authority, args.numberOfGamesAgainstARandomPlayer)
    winRate = numberOfWinsForEvaluator / (
        numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws)
    lossRate = numberOfWinsForRandomPlayer / (
        numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws)
    drawRate = numberOfDraws / (numberOfWinsForEvaluator +
                                numberOfWinsForRandomPlayer + numberOfDraws)
    logging.info(
        "Against a random player, winRate = {}; drawRate = {}; lossRate = {}".
        format(winRate, drawRate, lossRate))

    epochLossFile.write('0' + ',' + '-' + ',' + '-' + ',' +
                        str(winRate - lossRate) + ',' + str(winRate) + ',' +
                        str(drawRate) + ',' + str(lossRate) + '\n')

    playerToEpsilonDict = {
        playerList[0]: args.epsilon,
        playerList[1]: args.epsilon
    }

    for epoch in range(1, args.numberOfEpochs + 1):
        logging.info("Epoch {}".format(epoch))
        decoderMLP.train()

        # Generate positions
        minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions(
            epoch)
        maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions
        logging.info("Generating positions...")
        startingPositionsList = Predictor.SimulateRandomGames(
            authority, minimumNumberOfMovesForInitialPositions,
            maximumNumberOfMovesForInitialPositions,
            args.numberOfPositionsForTraining)
        #print ("main(): startingPositionsList = {}".format(startingPositionsList))

        startingPositionsTensor = StartingPositionsTensor(
            startingPositionsList)
        logging.info(
            "Evaluating expected reward for each starting position...")
        expectedRewardsList = Predictor.ExpectedRewardsList(
            authority,
            decoderMLP,
            startingPositionsList,
            args.numberOfSimulations,
            playerList[1],
            playerToEpsilonDict=playerToEpsilonDict)
        #print ("main(): expectedRewardsList = {}".format(expectedRewardsList))
        expectedRewardsTensor = ExpectedRewardsTensor(expectedRewardsList)

        # Since the samples are generated dynamically, there is no need for minibatches: all samples are always new
        optimizer.zero_grad()

        # Forward pass
        outputTensor = decoderMLP(startingPositionsTensor)

        # Calculate the error and backpropagate
        trainingLoss = loss(outputTensor, expectedRewardsTensor)
        logging.info("trainingLoss.item() = {}".format(trainingLoss.item()))

        trainingLoss.backward()

        # Move in the gradient descent direction
        optimizer.step()

        # Validation
        decoderMLP.eval()
        validationStartingPositionsList = Predictor.SimulateRandomGames(
            authority, minimumNumberOfMovesForInitialPositions,
            maximumNumberOfMovesForInitialPositions,
            args.numberOfPositionsForValidation)

        validationStartingPositionsTensor = StartingPositionsTensor(
            validationStartingPositionsList)
        logging.info(
            "Evaluating expected reward for each validation starting position..."
        )
        validationExpectedRewardsList = Predictor.ExpectedRewardsList(
            authority,
            decoderMLP,
            validationStartingPositionsList,
            args.numberOfSimulations,
            playerList[1],
            playerToEpsilonDict=playerToEpsilonDict)
        validationExpectedRewardsTensor = ExpectedRewardsTensor(
            validationExpectedRewardsList)

        validationOutputTensor = decoderMLP(validationStartingPositionsTensor)
        validationLoss = loss(validationOutputTensor,
                              validationExpectedRewardsTensor)
        logging.info("validationLoss.item() = {}".format(
            validationLoss.item()))

        # Play against a random player
        (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer,
         numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer(
             decoderMLP, authority, args.numberOfGamesAgainstARandomPlayer)
        winRate = numberOfWinsForEvaluator / (numberOfWinsForEvaluator +
                                              numberOfWinsForRandomPlayer +
                                              numberOfDraws)
        lossRate = numberOfWinsForRandomPlayer / (numberOfWinsForEvaluator +
                                                  numberOfWinsForRandomPlayer +
                                                  numberOfDraws)
        drawRate = numberOfDraws / (numberOfWinsForEvaluator +
                                    numberOfWinsForRandomPlayer +
                                    numberOfDraws)
        logging.info(
            "Against a random player, winRate = {}; drawRate = {}; lossRate = {}"
            .format(winRate, drawRate, lossRate))

        epochLossFile.write(
            str(epoch) + ',' + str(trainingLoss.item()) + ',' +
            str(validationLoss.item()) + ',' + str(winRate - lossRate) + ',' +
            str(winRate) + ',' + str(drawRate) + ',' + str(lossRate) + '\n')
        if epoch % 10 == 0:
            filepath = os.path.join(args.outputDirectory,
                                    'tictactoe_' + str(epoch) + '.bin')
            decoderMLP.Save(filepath)
def main():
    logging.info("learnTicTacToeWithDecoderRandomForest.py main()")

    authority = tictactoe.Authority()
    positionTensorShape = authority.PositionTensorShape()
    moveTensorShape = authority.MoveTensorShape()
    playerList = authority.PlayersList()

    if args.startWithNeuralNetwork is not None:
        raise NotImplementedError(
            "main(): Start with a neural network is not implemented...")
    else:
        if args.startWithAutoencoder is not None:
            autoencoderNet = autoencoder.position.Net()
            autoencoderNet.Load(args.startWithAutoencoder)

            decoderRandomForest = Decoder.BuildARandomForestDecoderFromAnAutoencoder(
                autoencoderNet, args.maximumNumberOfTrees,
                args.treesMaximumDepth)
            decoderRandomForest.SetEvaluationMode('mean')

    print("main(): decoderRandomForest.encodingBodyStructureSeq = {}".format(
        decoderRandomForest.encodingBodyStructureSeq))
    """# Create the optimizer
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, neuralNetwork.parameters()), lr=args.learningRate,
                                 betas=(0.5, 0.999))

    # Loss function
    loss = torch.nn.MSELoss()

    # Initial learning rate
    learningRate = args.learningRate
    """

    # Output monitoring file
    epochLossFile = open(os.path.join(args.outputDirectory, 'epochLoss.csv'),
                         "w",
                         buffering=1)  # Flush the buffer at each line
    epochLossFile.write(
        "epoch,trainingMSE,validationMSE,averageRewardAgainstRandomPlayer,winRate,drawRate,lossRate\n"
    )

    # First game with a random player, before any training
    (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer,
     numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer(
         decoderRandomForest, authority, 30)
    winRate = numberOfWinsForEvaluator / (
        numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws)
    lossRate = numberOfWinsForRandomPlayer / (
        numberOfWinsForEvaluator + numberOfWinsForRandomPlayer + numberOfDraws)
    drawRate = numberOfDraws / (numberOfWinsForEvaluator +
                                numberOfWinsForRandomPlayer + numberOfDraws)
    logging.info(
        "Against a random player, winRate = {}; drawRate = {}; lossRate = {}".
        format(winRate, drawRate, lossRate))

    epochLossFile.write('0' + ',' + '-' + ',' + '-' + ',' +
                        str(winRate - lossRate) + ',' + str(winRate) + ',' +
                        str(drawRate) + ',' + str(lossRate) + '\n')

    for epoch in range(1, args.numberOfEpochs + 1):
        logging.info("Epoch {}".format(epoch))
        # Generate positions
        minimumNumberOfMovesForInitialPositions = MinimumNumberOfMovesForInitialPositions(
            epoch)
        maximumNumberOfMovesForInitialPositions = args.maximumNumberOfMovesForInitialPositions
        logging.info("Generating positions...")
        startingPositionsList = Predictor.SimulateRandomGames(
            authority, minimumNumberOfMovesForInitialPositions,
            maximumNumberOfMovesForInitialPositions,
            args.numberOfPositionsForTraining)
        startingPositionsTensor = StartingPositionsTensor(
            startingPositionsList)
        logging.info(
            "Evaluating expected reward for each starting position...")
        expectedRewardsList = ExpectedRewardsList(authority,
                                                  decoderRandomForest,
                                                  startingPositionsList,
                                                  args.numberOfSimulations,
                                                  playerList[1], args.epsilon)
        #print ("expectedRewardsList = {}".format(expectedRewardsList))
        expectedRewardsTensor = ExpectedRewardsTensor(expectedRewardsList)

        logging.info("Learning from the examples...")
        decoderRandomForest.LearnFromMinibatch(startingPositionsTensor,
                                               expectedRewardsTensor)

        afterLearningTrainingPredictionsList = decoderRandomForest.Value(
            startingPositionsTensor)
        afterLearningTrainingPredictionsTensor = ExpectedRewardsTensor(
            afterLearningTrainingPredictionsList)
        trainingMSE = torch.nn.functional.mse_loss(
            afterLearningTrainingPredictionsTensor,
            expectedRewardsTensor).item()
        logging.info("trainingMSE = {}".format(trainingMSE))

        # Test on validation positions
        logging.info("Generating validation positions...")
        validationStartingPositionsList = Predictor.SimulateRandomGames(
            authority, minimumNumberOfMovesForInitialPositions,
            maximumNumberOfMovesForInitialPositions,
            args.numberOfPositionsForValidation)
        validationStartingPositionsTensor = StartingPositionsTensor(
            validationStartingPositionsList)
        logging.info(
            "Evaluating expected reward for each validation starting position..."
        )
        validationExpectedRewardsList = ExpectedRewardsList(
            authority, decoderRandomForest, validationStartingPositionsList,
            args.numberOfSimulations, playerList[1], args.epsilon)
        validationExpectedRewardsTensor = ExpectedRewardsTensor(
            validationExpectedRewardsList)

        currentValidationPredictionList = decoderRandomForest.Value(
            validationStartingPositionsTensor)
        currentValidationPredictionTensor = ExpectedRewardsTensor(
            currentValidationPredictionList)
        validationMSE = torch.nn.functional.mse_loss(
            currentValidationPredictionTensor,
            validationExpectedRewardsTensor).item()
        logging.info("validationMSE = {}".format(validationMSE))

        # Play against a random player
        (numberOfWinsForEvaluator, numberOfWinsForRandomPlayer,
         numberOfDraws) = Predictor.SimulateGamesAgainstARandomPlayer(
             decoderRandomForest, authority, 30)
        winRate = numberOfWinsForEvaluator / (numberOfWinsForEvaluator +
                                              numberOfWinsForRandomPlayer +
                                              numberOfDraws)
        lossRate = numberOfWinsForRandomPlayer / (numberOfWinsForEvaluator +
                                                  numberOfWinsForRandomPlayer +
                                                  numberOfDraws)
        drawRate = numberOfDraws / (numberOfWinsForEvaluator +
                                    numberOfWinsForRandomPlayer +
                                    numberOfDraws)
        logging.info(
            "Against a random player, winRate = {}; drawRate = {}; lossRate = {}"
            .format(winRate, drawRate, lossRate))

        epochLossFile.write(
            str(epoch) + ',' + str(trainingMSE) + ',' + str(validationMSE) +
            ',' + str(winRate - lossRate) + ',' + str(winRate) + ',' +
            str(drawRate) + ',' + str(lossRate) + '\n')
        filepath = os.path.join(args.outputDirectory,
                                'tictactoe_' + str(epoch) + '.bin')
        decoderRandomForest.Save(filepath)