示例#1
0
    def __init__(self, gameStates):
        equities = {0: {}, 1: {}}

        boardCards = gameStates.boards[:, -5:]
        player0HoleCards = gameStates.players[::2, :2]
        player1HoleCards = gameStates.players[1::2, :2]

        equities[0]['preflop'], equities[0]['flop'], equities[0]['turn'], \
            equities[0]['river'] = computeEquities(player0HoleCards, boardCards)
        equities[1]['preflop'], equities[1]['flop'], equities[1]['turn'], \
            equities[1]['river'] = computeEquities(player1HoleCards, boardCards)

        self.equities = equities
        GameDataContainer.__init__(self, len(boardCards))
示例#2
0
def getOptimizedWinAmounts(gameDataContainer, initStacks, RND_AGENT_IDX,
                           AI_AGENT_IDX, N_RND_PLAYS_PER_HAND):
    winAmountsAi = getWinAmounts(gameDataContainer, initStacks)[:,
                                                                AI_AGENT_IDX]

    # On each row there is win amounts for ai player for a certain hand (same hand is played multiple times)
    idxToOrig = np.arange(len(winAmountsAi)).reshape(
        (-1, N_RND_PLAYS_PER_HAND))
    winAmountsAi = winAmountsAi.reshape((-1, N_RND_PLAYS_PER_HAND))

    # For each hand pick the smallest win amount
    minColumn = np.argsort(winAmountsAi)[:, 0]
    winAmountsAi = winAmountsAi[np.arange(len(winAmountsAi)), minColumn]
    idxToOrig = idxToOrig[np.arange(len(winAmountsAi)), minColumn]

    # Pick hands the ai is still winning and reduce the win amounts of the hands
    positiveWinAmountsMask = winAmountsAi > 0
    idxToOptimize = idxToOrig[positiveWinAmountsMask]
    #    winAmountsToOptimize = winAmountsAi[positiveWinAmountsMask]

    # If win amounts for ai player are negative for all games
    if (np.sum(positiveWinAmountsMask) == 0):
        return winAmountsAi

    # Pick game states to be optimized
    gameData, indexes = gameDataContainer.getData()
    players, boards, availableActions, controlVariables = [], [], [], []
    for i, idx in enumerate(idxToOptimize):
        gameDataIdx = np.array(indexes[idx])

        playersData = gameData['playersData'][gameDataIdx]
        boardsData = gameData['boardsData'][gameDataIdx]
        controlVarsData = gameData['controlVariablesData'][gameDataIdx]
        availActionsData = gameData['availableActionsData'][gameDataIdx]

        actingPlayerIdx = playersData[:-1, 14]  # Exclude last index

        rndAgentActingIdx = np.nonzero(actingPlayerIdx == RND_AGENT_IDX)[0]
        rndAgentActingIdx = rndAgentActingIdx[np.random.randint(
            len(rndAgentActingIdx))]
        #        rndAgentActingIdx = rndAgentActingIdx[0]

        players.append(
            GameDataContainer.unflattenPlayersData(
                playersData[rndAgentActingIdx].reshape((1, -1))))
        boards.append(boardsData[rndAgentActingIdx])
        availableActions.append(availActionsData[rndAgentActingIdx])
        controlVariables.append(controlVarsData[rndAgentActingIdx])

    gameStatesOptimized = GameState(np.row_stack(boards),
                                    np.row_stack(players),
                                    np.row_stack(controlVariables),
                                    np.row_stack(availableActions))

    # Execute fold action for the game states
    foldActions = np.zeros((len(idxToOptimize), 2), dtype=np.int) - 1
    foldActions[:, 0] = 1
    gameStatesOptimized = executeActions(gameStatesOptimized, foldActions)

    # Compute optimized win amounts
    finalStacks = np.column_stack(
        (gameStatesOptimized.players[::2, 2], gameStatesOptimized.players[1::2,
                                                                          2]))
    optimizedWinAmounts = winAmountsAi.copy()
    optimizedWinAmounts[positiveWinAmountsMask] = (
        finalStacks - initStacks[idxToOptimize])[:, AI_AGENT_IDX]
    optimizedWinAmounts = optimizedWinAmounts / smallBlindsForGames[::
                                                                    N_RND_PLAYS_PER_HAND]

    return optimizedWinAmounts
示例#3
0
    #    print("No GPU found")

    # %%

    # Create game data for evaluation
    initGameStates, initStacks = initRandomGames(N_HANDS_FOR_EVAL)

    initGameStates.availableActions = np.repeat(
        initGameStates.availableActions, N_RND_PLAYS_PER_HAND, axis=0)
    initGameStates.boards = np.repeat(initGameStates.boards,
                                      N_RND_PLAYS_PER_HAND,
                                      axis=0)
    initGameStates.controlVariables = np.repeat(
        initGameStates.controlVariables, N_RND_PLAYS_PER_HAND, axis=0)
    initGameStates.players = GameDataContainer.unflattenPlayersData(
        np.repeat(GameDataContainer.flattenPlayersData(initGameStates.players),
                  N_RND_PLAYS_PER_HAND,
                  axis=0))
    initGameStates.validMask = np.repeat(initGameStates.validMask,
                                         N_RND_PLAYS_PER_HAND,
                                         axis=0)
    initGameStates.validMaskPlayers = np.repeat(
        initGameStates.validMaskPlayers, N_RND_PLAYS_PER_HAND, axis=0)
    initStacks = np.repeat(initStacks, N_RND_PLAYS_PER_HAND, axis=0)
    smallBlindsForGames = initGameStates.boards[:, 1]

    mockActions = np.zeros(
        (len(initGameStates.availableActions), 2), dtype=np.int64) - 999
    actionsToExecute = np.zeros(
        (len(initGameStates.availableActions), 2), dtype=np.int64) - 999

    bestIndexes = []
示例#4
0
class AiModel(nn.Module):
    def __init__(self, winLen):
        super(AiModel, self).__init__()

        self.layers = nn.Sequential(nn.Linear(7 * (winLen + 17), 250),
                                    nn.ReLU(), nn.Linear(250, 10))

        # Get references to weights and biases. These are used when mutating the model.
        self.weights, self.biases = [], []
        for layer in self.layers:
            # Hack. Throws an AttributeError if there is no weights associated for the layer, e.q., nn.Relu
            try:
                self.weights.append(layer.weight)
                self.biases.append(layer.bias)
            except AttributeError:
                pass

    def forward(self, x):
        x = self.layers(x)
        return x

#        def mutate(self, sigma):
#            for i in range(len(self.weights)):
#                w = self.weights[i].data.numpy()
#                b = self.biases[i].data.numpy()
#                w += np.random.normal(scale=sigma, size=w.shape)
#                b += np.random.normal(scale=sigma, size=b.shape)

    def mutateWeights(self, sigma, ratio=1.0):
        for i in range(len(self.weights)):
            w = self.weights[i].data.numpy().reshape(-1)
            rndIdx = np.random.choice(len(w),
                                      size=max(1, int(ratio * len(w))),
                                      replace=0)
            w[rndIdx] += np.random.normal(scale=sigma, size=len(rndIdx))
#                w = self.weights[i].data.numpy()
#                w += np.random.normal(scale=sigma, size=w.shape)

    def mutateBiases(self, sigma, ratio=1.0):
        for i in range(len(self.biases)):
            b = self.biases[i].data.numpy().reshape(-1)
            rndIdx = np.random.choice(len(b),
                                      size=max(1, int(ratio * len(b))),
                                      replace=0)
            b[rndIdx] += np.random.normal(scale=sigma, size=len(rndIdx))
#                b = self.biases[i].data.numpy()
#                b += np.random.normal(scale=sigma, size=b.shape)

    def mutate(self, sigma, ratio=1.0):
        self.mutateWeights(sigma, ratio=ratio)
        self.mutateBiases(sigma, ratio=ratio)

#if __name__ == "__main__":

# %%

# Initialize agent

#SEED = 123

    POPULATION_SIZE = 200
    RATIO_BEST_INDIVIDUALS = 0.10
    MUTATION_SIGMA = 1.0e-2
    MUTATION_RATIO = 1.0

    N_HANDS_FOR_EVAL = 25000
    N_HANDS_FOR_RE_EVAL = 100000
    N_RND_PLAYS_PER_HAND = 1

    RND_AGENT_IDX = 0
    AI_AGENT_IDX = np.abs(RND_AGENT_IDX - 1)
    WIN_LEN = 2

    N_CORES = 6

    device = torch.device('cpu')

    models = []
    for i in range(POPULATION_SIZE):
        #        m = keras.Sequential()
        #        m.add(keras.layers.Dense(50, activation='relu', input_dim=7*(WIN_LEN+17)))
        #        m.add(keras.layers.Dense(10, activation='relu'))

        models.append(AiModel(WIN_LEN).to(device))
    models = np.array(models)

    #    # Disable gpu
    #    import os
    #    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    #    if tf.test.gpu_device_name():
    #        print('GPU found')
    #    else:
    #        print("No GPU found")

    # %%

    # Create game data for evaluation
    initGameStates, initStacks = initRandomGames(N_HANDS_FOR_EVAL)

    initGameStates.availableActions = np.repeat(
        initGameStates.availableActions, N_RND_PLAYS_PER_HAND, axis=0)
    initGameStates.boards = np.repeat(initGameStates.boards,
                                      N_RND_PLAYS_PER_HAND,
                                      axis=0)
    initGameStates.controlVariables = np.repeat(
        initGameStates.controlVariables, N_RND_PLAYS_PER_HAND, axis=0)
    initGameStates.players = GameDataContainer.unflattenPlayersData(
        np.repeat(GameDataContainer.flattenPlayersData(initGameStates.players),
                  N_RND_PLAYS_PER_HAND,
                  axis=0))
    initGameStates.validMask = np.repeat(initGameStates.validMask,
                                         N_RND_PLAYS_PER_HAND,
                                         axis=0)
    initGameStates.validMaskPlayers = np.repeat(
        initGameStates.validMaskPlayers, N_RND_PLAYS_PER_HAND, axis=0)
    initStacks = np.repeat(initStacks, N_RND_PLAYS_PER_HAND, axis=0)
    #    smallBlindsForGames = initGameStates.boards[:,1]

    populationFitness, bestFitness = [], []

    # %%

    for k in range(50):

        states, stacks = initRandomGames(int(N_HANDS_FOR_EVAL * 0.25))
        #        smallBlinds = states.boards[:,1]
        rndIdx = np.random.choice(N_HANDS_FOR_EVAL,
                                  size=len(stacks),
                                  replace=0)

        #        smallBlindsForGames[rndIdx] = smallBlinds
        initStacks[rndIdx] = stacks

        initGameStates.availableActions[rndIdx] = states.availableActions
        initGameStates.boards[rndIdx] = states.boards
        initGameStates.controlVariables[rndIdx] = states.controlVariables
        rndIdx2 = np.repeat(rndIdx * 2, 2)
        rndIdx2[1::2] = rndIdx * 2 + 1
        initGameStates.players[rndIdx2] = states.players

        # Play games
        finalGameStates = playGamesParallel(initGameStates, models, N_CORES,
                                            WIN_LEN, RND_AGENT_IDX,
                                            AI_AGENT_IDX)
        assert len(finalGameStates) == POPULATION_SIZE

        modelWinAmounts = getWinAmountsForModels(finalGameStates, initStacks,
                                                 AI_AGENT_IDX)
        #        modelWinAmounts = optimizeWinAmounts(modelWinAmounts)

        modelFitness = [np.mean(amounts) for amounts in modelWinAmounts]
        #        modelFitness = [np.mean(amounts)/np.std(amounts) for amounts in modelWinAmounts]
        #        modelFitness = [(np.mean(amounts)) + (np.sum(~(np.isclose(amounts,-1) | \
        #                            np.isclose(amounts,-2)))/len(amounts)) for amounts in modelWinAmounts]

        sorter = np.argsort(modelFitness)
        bestIdx = sorter[-int(len(sorter) * RATIO_BEST_INDIVIDUALS):]

        replayGameStates, replayStacks = initRandomGames(N_HANDS_FOR_RE_EVAL)
        replayFinalGameStates = playGamesParallel(replayGameStates,
                                                  models[bestIdx], N_CORES,
                                                  WIN_LEN, RND_AGENT_IDX,
                                                  AI_AGENT_IDX)
        assert len(finalGameStates) == POPULATION_SIZE
        replayModelWinAmounts = getWinAmountsForModels(replayFinalGameStates,
                                                       replayStacks,
                                                       AI_AGENT_IDX)
        #        replayModelWinAmounts = optimizeWinAmounts(replayModelWinAmounts)
        #        replayModelFitness = [(np.mean(np.concatenate((amounts,amounts2)))) + \
        #                              (np.sum(~(np.isclose(np.concatenate((amounts,amounts2)),-1) | \
        #                                        np.isclose(np.concatenate((amounts,amounts2)),-2)))/ \
        #                                            len(np.concatenate((amounts,amounts2))))
        replayModelFitness = [np.mean(np.concatenate((amounts,amounts2))) \
#        replayModelFitness = [np.mean(np.concatenate((amounts,amounts2)))/ \
#                np.std(np.concatenate((amounts,amounts2))) \
            for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)]
        #        print(np.argsort(replayModelFitness))
        bestIdx = bestIdx[np.argsort(replayModelFitness)]

        [np.mean(np.concatenate((amounts,amounts2))) \
             for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)]

        populationFitness.append(np.mean(modelFitness))
        bestFitness.append(np.max(replayModelFitness))

        print('................................')
        print(k, np.mean(modelFitness), np.max(replayModelFitness))
        #        print(k, np.mean([np.mean(amounts) for amounts in modelWinAmounts]),
        #              np.max([np.mean(np.concatenate((amounts,amounts2))) \
        #                      for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)]))
        #        print(np.argsort(replayModelFitness))

        # Save data
        #        [tf.keras.models.save_model(model, 'data/models/'+str(i)) for i,model in enumerate(models)]
        #        np.save('data/'+str(k)+'_win_amounts', modelWinAmounts)
        #    m = tf.keras.models.load_model('aa.aa')    # This is how to load, just a note

        # Put the best individual without mutation to the next generation
        nextGeneration = []
        nextGeneration = [models[idx] for idx in bestIdx[-3:]]

        # Mutate
        for i in range(POPULATION_SIZE - len(nextGeneration)):
            idx = bestIdx[np.random.randint(len(bestIdx))]

            model = copy.deepcopy(models[idx])
            model.mutate(MUTATION_SIGMA, ratio=MUTATION_RATIO)

            #            weights = model.get_weights()
            #            weightsUpdated = [w + np.random.normal(scale=MUTATION_SIGMA, size=w.shape) for w in weights]
            #            model.set_weights(weightsUpdated)

            nextGeneration.append(model)

        models = np.array(nextGeneration)

    n = 5
    plt.plot(populationFitness[n:])
    plt.plot(bestFitness[n:])
示例#5
0
        
        return createActionsToExecute(actionAmounts), mask



# %%

nGames = 5000
callPlayerIdx = 0
rndPlayerIdx = 1
nRandomSets = 8

initGameStates, initStacks = initRandomGames(nGames, seed=76)
equities = getEquities(initGameStates)

gameCont = GameDataContainer(nGames)

agents = [CallAgent(callPlayerIdx), RndAgent(rndPlayerIdx)]
#agents = [AiAgent(0, computeFeatures, regressor, equities), RndAgent(rndPlayerIdx)]

gameContainers = [playGames(agents, copy.deepcopy(initGameStates), copy.deepcopy(gameCont)) \
    for i in range(nRandomSets)]

# %%

winAmounts = [getWinAmounts(c, initStacks)[:,rndPlayerIdx] for c in gameContainers]
winAmounts = np.column_stack((winAmounts))

highestReturnGameContainerIdx = np.argmax(winAmounts,1)
gameNums = np.arange(nGames)
gameNumsForGameContainers, winAmounts2 = [[] for i in range(nRandomSets)], [[] for i in range(nRandomSets)]