def SemiExhaustiveMinimaxHighestValue(
    playerList,
    authority,
    neuralNetwork,
    positionTensor,
    epsilon,
    maximumDepthOfSemiExhaustiveSearch,
    numberOfTopMovesToDevelop,
    displayExpectedMoveValues,
):
    (moveValuesTensor, standardDeviationTensor,
     legalMovesMask) = expectedMoveValues.SemiExhaustiveMiniMax(
         playerList, authority, neuralNetwork, positionTensor, epsilon,
         maximumDepthOfSemiExhaustiveSearch, 1, numberOfTopMovesToDevelop)
    chosenMoveTensor = torch.zeros(authority.MoveTensorShape())
    highestValue = -1E9
    highestValueCoords = (0, 0, 0, 0)
    nonZeroCoordsTensor = torch.nonzero(legalMovesMask)
    for nonZeroCoordsNdx in range(nonZeroCoordsTensor.size(0)):
        nonZeroCoords = nonZeroCoordsTensor[nonZeroCoordsNdx]
        if moveValuesTensor[nonZeroCoords[0], nonZeroCoords[1],
                            nonZeroCoords[2], nonZeroCoords[3]] > highestValue:
            highestValue = moveValuesTensor[nonZeroCoords[0], nonZeroCoords[1],
                                            nonZeroCoords[2], nonZeroCoords[3]]
            highestValueCoords = nonZeroCoords
    chosenMoveTensor[highestValueCoords[0], highestValueCoords[1],
                     highestValueCoords[2], highestValueCoords[3]] = 1.0

    if displayExpectedMoveValues:
        DisplayExpectedMoveValues(moveValuesTensor, standardDeviationTensor,
                                  legalMovesMask, chosenMoveTensor)

    return chosenMoveTensor
def GenerateMoveStatisticsWithMiniMax(
                            playerList,
                            authority,
                            neuralNetwork,
                            numberOfMovesForInitialPositionsMinMax,
                            numberOfInitialPositions,
                            maximumDepthOfExhaustiveSearch,
                            additionalStartingPositionsList=[]
                            ):
    # Create initial positions
    initialPositions = additionalStartingPositionsList

    minimumNumberOfMovesForInitialPositions = numberOfMovesForInitialPositionsMinMax[0]
    maximumNumberOfMovesForInitialPositions = numberOfMovesForInitialPositionsMinMax[1]

    if minimumNumberOfMovesForInitialPositions > maximumNumberOfMovesForInitialPositions:
        temp = minimumNumberOfMovesForInitialPositions
        minimumNumberOfMovesForInitialPositions = maximumNumberOfMovesForInitialPositions
        maximumNumberOfMovesForInitialPositions = temp

    while len(initialPositions) < numberOfInitialPositions: # Complete with random games
        numberOfMoves = random.randint(minimumNumberOfMovesForInitialPositions, maximumNumberOfMovesForInitialPositions)
        if numberOfMoves % 2 == 1:
            numberOfMoves += 1  # Make sure the last player to have played is playerList[1]
        positionTensor = authority.InitialPosition()
        winner = None
        moveNdx = 0
        while moveNdx < numberOfMoves and winner is None:
            player = playerList[moveNdx % 2]
            # print ("GenerateMoveStatistics(): player = {}".format(player))
            randomMoveTensor = utilities.ChooseARandomMove(positionTensor, player, authority)
            positionTensor, winner = authority.Move(positionTensor, player, randomMoveTensor)
            moveNdx += 1
        if winner is None:
            initialPositions.append(positionTensor.clone())


    # For each initial position, evaluate the value of each possible move through semi-exhaustive minimax
    positionMoveStatistics = list()
    for initialPosition in initialPositions:
        (averageValuesTensor, standardDeviationTensor, legalMovesNMask) = \
        expectedMoveValues.SemiExhaustiveMiniMax(
            playerList,
            authority,
            neuralNetwork,
            #0, # Always choose the highest value move
            initialPosition,
            epsilon=0,
            maximumDepthOfSemiExhaustiveSearch=maximumDepthOfExhaustiveSearch,
            currentDepth=1,
            numberOfTopMovesToDevelop=-1 # Develop all moves => exhaustive search
        )
        positionMoveStatistics.append((initialPosition, averageValuesTensor,
                                      standardDeviationTensor, legalMovesNMask))

    return positionMoveStatistics
def AskTheNeuralNetworkToChooseAMove(playersList, authority,
                                     playingNeuralNetwork, positionTensor,
                                     depthOfExhaustiveSearch,
                                     numberOfTopMovesToDevelop,
                                     softMaxTemperature):

    moveValuesTensor, standardDeviationTensor, legalMovesMask = \
    expectedMoveValues.SemiExhaustiveMiniMax(
        playersList,
        authority,
        playingNeuralNetwork,
        positionTensor,
        0.0,
        depthOfExhaustiveSearch,
        1,
        numberOfTopMovesToDevelop
    )

    # Normalize probabilities
    normalizedActionValuesTensor = utilities.NormalizeProbabilities(
        moveValuesTensor,
        legalMovesMask,
        preApplySoftMax=True,
        softMaxTemperature=softMaxTemperature)
    chosenMoveTensor = torch.zeros(authority.MoveTensorShape())
    # Choose with roulette
    runningSum = 0
    chosenCoordinates = None

    nonZeroCoordsTensor = torch.nonzero(legalMovesMask)
    randomNbr = random.random()
    for nonZeroCoordsNdx in range(nonZeroCoordsTensor.size(0) - 1):
        nonZeroCoords = nonZeroCoordsTensor[nonZeroCoordsNdx]
        runningSum += normalizedActionValuesTensor[nonZeroCoords[0],
                                                   nonZeroCoords[1],
                                                   nonZeroCoords[2],
                                                   nonZeroCoords[3]]
        if runningSum >= randomNbr and chosenCoordinates is None:
            chosenCoordinates = (nonZeroCoords[0], nonZeroCoords[1],
                                 nonZeroCoords[2], nonZeroCoords[3])
            break  # Stop looping
    if chosenCoordinates is None:  # and randomNbr - runningSum < 0.000001: # Choose the last candidate
        chosenNdx = nonZeroCoordsTensor.size(0) - 1
        nonZeroCoords = nonZeroCoordsTensor[chosenNdx]
        chosenCoordinates = (nonZeroCoords[0], nonZeroCoords[1],
                             nonZeroCoords[2], nonZeroCoords[3])

    chosenMoveTensor[chosenCoordinates] = 1.0

    return chosenMoveTensor
def main():
    print ('netEnsemble.py main()')
    #neuralNet1 = ConvolutionStack.Net()
    #neuralNet1.Load('/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,16),(5,16),(5,16)]_(1,1,1,7)_connect4_356.pth')
    neuralNet2 = ConvolutionStack.Net()
    neuralNet2.Load('/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033.pth')
    neuralNet3 = ConvolutionStack.Net()
    neuralNet3.Load('/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033b.pth')
    neuralNet4 = ConvolutionStack.Net()
    neuralNet4.Load('/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033c.pth')
    neuralNet5 = ConvolutionStack.Net()
    neuralNet5.Load('/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033d.pth')
    neuralNet6 = ConvolutionStack.Net()
    neuralNet6.Load('/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033e.pth')
    neuralNet7 = ConvolutionStack.Net()
    neuralNet7.Load('/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.pth')
    neuralNetsList = [neuralNet2, neuralNet3, neuralNet4, neuralNet5, neuralNet6, neuralNet7]

    committee = Committee(neuralNetsList)

    #inputTensor = torch.zeros((2, 1, 6, 7)).unsqueeze(0)
    #outputTensorsList = committee.forward(inputTensor)
    #print ('main(): outputTensorsList = {}'.format(outputTensorsList))

    #medianValuesTensor = committee.MedianValues(torch.zeros((2, 1, 6, 7)).unsqueeze(0))
    #print ("medianValuesTensor = {}".format(medianValuesTensor))

    import connect4
    authority = connect4.Authority()
    playerList = authority.PlayersList()
    epsilon = 0
    maximumDepthOfSemiExhaustiveSearch = 2
    numberOfTopMovesToDevelop = 4
    inputTensor = authority.InitialPosition()

    authority.Display(inputTensor)
    (moveValuesTensor, standardDeviationTensor, legalMovesMask) = expectedMoveValues.SemiExhaustiveMiniMax(
        playerList,
        authority,
        committee,
        inputTensor,
        epsilon,
        maximumDepthOfSemiExhaustiveSearch,
        1,
        numberOfTopMovesToDevelop
    )
    print ("moveValuesTensor = \n{}".format(moveValuesTensor))
    print ("standardDeviationTensor = \n{}".format(standardDeviationTensor))
    print ("legalMovesMask = \n{}".format(legalMovesMask))
def main():
    print("testNetEnsemble.py main()")
    neuralNet2 = ConvolutionStack.Net()
    neuralNet2.Load(
        '/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033.pth'
    )
    neuralNet3 = ConvolutionStack.Net()
    neuralNet3.Load(
        '/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033b.pth'
    )
    neuralNet4 = ConvolutionStack.Net()
    neuralNet4.Load(
        '/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033c.pth'
    )
    neuralNet5 = ConvolutionStack.Net()
    neuralNet5.Load(
        '/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033d.pth'
    )
    neuralNet6 = ConvolutionStack.Net()
    neuralNet6.Load(
        '/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.0033e.pth'
    )
    neuralNet7 = ConvolutionStack.Net()
    neuralNet7.Load(
        '/home/sebastien/projects/DeepReinforcementLearning/outputs/ToKeep/Net_(2,1,6,7)_[(5,32),(5,32),(5,32)]_(1,1,1,7)_connect4_defeatRate0.pth'
    )
    neuralNetsList = [
        neuralNet2, neuralNet3, neuralNet4, neuralNet5, neuralNet6, neuralNet7
    ]

    committee = netEnsemble.Committee(neuralNetsList)

    import connect4
    authority = connect4.Authority()
    playerList = authority.PlayersList()
    epsilon = 0
    maximumDepthOfSemiExhaustiveSearch = 2
    numberOfTopMovesToDevelop = 7
    inputTensor = authority.InitialPosition()
    inputTensor[0, 0, 5, 1] = 1
    inputTensor[0, 0, 2, 2] = 1
    inputTensor[0, 0, 4, 2] = 1
    inputTensor[0, 0, 5, 3] = 1
    inputTensor[0, 0, 5, 4] = 1
    inputTensor[0, 0, 5, 5] = 1
    inputTensor[1, 0, 3, 2] = 1
    inputTensor[1, 0, 5, 2] = 1
    inputTensor[1, 0, 3, 3] = 1
    inputTensor[1, 0, 4, 3] = 1
    inputTensor[1, 0, 3, 4] = 1
    inputTensor[1, 0, 4, 4] = 1
    inputTensor[1, 0, 4, 6] = 1
    inputTensor[1, 0, 5, 6] = 1
    authority.Display(inputTensor)
    (moveValuesTensor, standardDeviationTensor,
     legalMovesMask) = expectedMoveValues.SemiExhaustiveMiniMax(
         playerList, authority, committee, inputTensor, epsilon,
         maximumDepthOfSemiExhaustiveSearch, 1, numberOfTopMovesToDevelop)
    print("moveValuesTensor = \n{}".format(moveValuesTensor))
    print("standardDeviationTensor = \n{}".format(standardDeviationTensor))
    print("legalMovesMask = \n{}".format(legalMovesMask))