Пример #1
0
            break

    # Save also last game state
    actionsToExecute = np.zeros(
        (len(gameStates.availableActions), 2), dtype=np.int64) - 999
    gameDataContainer.addData(gameStates, actionsToExecute)

    return gameDataContainer


# %%
# Run random games to initialize random forest agent

nGames = 10000

gameStates = initRandomGames(nGames)
rfFeatures = RfFeatures(gameStates)

#agents = [RndAgent(0), RfAgent(1, rfFeatures, regressor)]
#agents = [RfAgent(0, rfFeatures, regressor), RfAgent(1, rfFeatures, regressor)]
agents = [RndAgent(0), RndAgent(1)]

rfFeatures = playGames(agents, gameStates, rfFeatures)

features, executedActions, executedActionsNorm, misc = rfFeatures.getFeaturesForAllGameStates(
)

# Do checks
assert np.sum(misc['gameFailedMask']) == 0  # No failures
assert np.sum(
    misc['gameFinishedMask']) == nGames  # All games have ended succesfully
Пример #2
0
    #    actionAmounts = executedActions[:,1].copy()
    #    actionAmounts[np.isclose(actionAmounts,0)] = 0.001
    #    betToWinRatios = winAmountsFeatures / actionAmounts
    #    betToWinRatios = np.clip(betToWinRatios, a_min=-clippingThres, a_max=clippingThres)

    return winAmountsFeatures


# %%
# Run random games to initialize random forest agent

#

nGames = 100000

gameStates = initRandomGames(nGames)
rfFeatures = RfFeatures(gameStates)

#agents = [RndAgent(0), RfAgent(1, rfFeatures, regressor)]

#agents = [RfAgent(0, rfFeatures, regressor), RfAgent(1, rfFeatures, regressor)]
#agents = [RfAgent(0, rfFeatures, regressorOld), RfAgent(1, rfFeatures, regressor)]
#agents = [RfAgent(0, rfFeatures, regressor0), RfAgent(1, rfFeatures, regressor)]

#agents = [RndAgent(0), RndAgent(1)]

while (1):
    actionsAgent0, maskAgent0 = agents[0].getActions(gameStates)
    actionsAgent1, maskAgent1 = agents[1].getActions(gameStates)

    actionsToExecute = np.zeros(
Пример #3
0
class AiModel(nn.Module):
    def __init__(self, winLen):
        super(AiModel, self).__init__()

        self.layers = nn.Sequential(nn.Linear(7 * (winLen + 17), 250),
                                    nn.ReLU(), nn.Linear(250, 10))

        # Get references to weights and biases. These are used when mutating the model.
        self.weights, self.biases = [], []
        for layer in self.layers:
            # Hack. Throws an AttributeError if there is no weights associated for the layer, e.q., nn.Relu
            try:
                self.weights.append(layer.weight)
                self.biases.append(layer.bias)
            except AttributeError:
                pass

    def forward(self, x):
        x = self.layers(x)
        return x

#        def mutate(self, sigma):
#            for i in range(len(self.weights)):
#                w = self.weights[i].data.numpy()
#                b = self.biases[i].data.numpy()
#                w += np.random.normal(scale=sigma, size=w.shape)
#                b += np.random.normal(scale=sigma, size=b.shape)

    def mutateWeights(self, sigma, ratio=1.0):
        for i in range(len(self.weights)):
            w = self.weights[i].data.numpy().reshape(-1)
            rndIdx = np.random.choice(len(w),
                                      size=max(1, int(ratio * len(w))),
                                      replace=0)
            w[rndIdx] += np.random.normal(scale=sigma, size=len(rndIdx))
#                w = self.weights[i].data.numpy()
#                w += np.random.normal(scale=sigma, size=w.shape)

    def mutateBiases(self, sigma, ratio=1.0):
        for i in range(len(self.biases)):
            b = self.biases[i].data.numpy().reshape(-1)
            rndIdx = np.random.choice(len(b),
                                      size=max(1, int(ratio * len(b))),
                                      replace=0)
            b[rndIdx] += np.random.normal(scale=sigma, size=len(rndIdx))
#                b = self.biases[i].data.numpy()
#                b += np.random.normal(scale=sigma, size=b.shape)

    def mutate(self, sigma, ratio=1.0):
        self.mutateWeights(sigma, ratio=ratio)
        self.mutateBiases(sigma, ratio=ratio)

#if __name__ == "__main__":

# %%

# Initialize agent

#SEED = 123

    POPULATION_SIZE = 200
    RATIO_BEST_INDIVIDUALS = 0.10
    MUTATION_SIGMA = 1.0e-2
    MUTATION_RATIO = 1.0

    N_HANDS_FOR_EVAL = 25000
    N_HANDS_FOR_RE_EVAL = 100000
    N_RND_PLAYS_PER_HAND = 1

    RND_AGENT_IDX = 0
    AI_AGENT_IDX = np.abs(RND_AGENT_IDX - 1)
    WIN_LEN = 2

    N_CORES = 6

    device = torch.device('cpu')

    models = []
    for i in range(POPULATION_SIZE):
        #        m = keras.Sequential()
        #        m.add(keras.layers.Dense(50, activation='relu', input_dim=7*(WIN_LEN+17)))
        #        m.add(keras.layers.Dense(10, activation='relu'))

        models.append(AiModel(WIN_LEN).to(device))
    models = np.array(models)

    #    # Disable gpu
    #    import os
    #    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    #    if tf.test.gpu_device_name():
    #        print('GPU found')
    #    else:
    #        print("No GPU found")

    # %%

    # Create game data for evaluation
    initGameStates, initStacks = initRandomGames(N_HANDS_FOR_EVAL)

    initGameStates.availableActions = np.repeat(
        initGameStates.availableActions, N_RND_PLAYS_PER_HAND, axis=0)
    initGameStates.boards = np.repeat(initGameStates.boards,
                                      N_RND_PLAYS_PER_HAND,
                                      axis=0)
    initGameStates.controlVariables = np.repeat(
        initGameStates.controlVariables, N_RND_PLAYS_PER_HAND, axis=0)
    initGameStates.players = GameDataContainer.unflattenPlayersData(
        np.repeat(GameDataContainer.flattenPlayersData(initGameStates.players),
                  N_RND_PLAYS_PER_HAND,
                  axis=0))
    initGameStates.validMask = np.repeat(initGameStates.validMask,
                                         N_RND_PLAYS_PER_HAND,
                                         axis=0)
    initGameStates.validMaskPlayers = np.repeat(
        initGameStates.validMaskPlayers, N_RND_PLAYS_PER_HAND, axis=0)
    initStacks = np.repeat(initStacks, N_RND_PLAYS_PER_HAND, axis=0)
    #    smallBlindsForGames = initGameStates.boards[:,1]

    populationFitness, bestFitness = [], []

    # %%

    for k in range(50):

        states, stacks = initRandomGames(int(N_HANDS_FOR_EVAL * 0.25))
        #        smallBlinds = states.boards[:,1]
        rndIdx = np.random.choice(N_HANDS_FOR_EVAL,
                                  size=len(stacks),
                                  replace=0)

        #        smallBlindsForGames[rndIdx] = smallBlinds
        initStacks[rndIdx] = stacks

        initGameStates.availableActions[rndIdx] = states.availableActions
        initGameStates.boards[rndIdx] = states.boards
        initGameStates.controlVariables[rndIdx] = states.controlVariables
        rndIdx2 = np.repeat(rndIdx * 2, 2)
        rndIdx2[1::2] = rndIdx * 2 + 1
        initGameStates.players[rndIdx2] = states.players

        # Play games
        finalGameStates = playGamesParallel(initGameStates, models, N_CORES,
                                            WIN_LEN, RND_AGENT_IDX,
                                            AI_AGENT_IDX)
        assert len(finalGameStates) == POPULATION_SIZE

        modelWinAmounts = getWinAmountsForModels(finalGameStates, initStacks,
                                                 AI_AGENT_IDX)
        #        modelWinAmounts = optimizeWinAmounts(modelWinAmounts)

        modelFitness = [np.mean(amounts) for amounts in modelWinAmounts]
        #        modelFitness = [np.mean(amounts)/np.std(amounts) for amounts in modelWinAmounts]
        #        modelFitness = [(np.mean(amounts)) + (np.sum(~(np.isclose(amounts,-1) | \
        #                            np.isclose(amounts,-2)))/len(amounts)) for amounts in modelWinAmounts]

        sorter = np.argsort(modelFitness)
        bestIdx = sorter[-int(len(sorter) * RATIO_BEST_INDIVIDUALS):]

        replayGameStates, replayStacks = initRandomGames(N_HANDS_FOR_RE_EVAL)
        replayFinalGameStates = playGamesParallel(replayGameStates,
                                                  models[bestIdx], N_CORES,
                                                  WIN_LEN, RND_AGENT_IDX,
                                                  AI_AGENT_IDX)
        assert len(finalGameStates) == POPULATION_SIZE
        replayModelWinAmounts = getWinAmountsForModels(replayFinalGameStates,
                                                       replayStacks,
                                                       AI_AGENT_IDX)
        #        replayModelWinAmounts = optimizeWinAmounts(replayModelWinAmounts)
        #        replayModelFitness = [(np.mean(np.concatenate((amounts,amounts2)))) + \
        #                              (np.sum(~(np.isclose(np.concatenate((amounts,amounts2)),-1) | \
        #                                        np.isclose(np.concatenate((amounts,amounts2)),-2)))/ \
        #                                            len(np.concatenate((amounts,amounts2))))
        replayModelFitness = [np.mean(np.concatenate((amounts,amounts2))) \
#        replayModelFitness = [np.mean(np.concatenate((amounts,amounts2)))/ \
#                np.std(np.concatenate((amounts,amounts2))) \
            for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)]
        #        print(np.argsort(replayModelFitness))
        bestIdx = bestIdx[np.argsort(replayModelFitness)]

        [np.mean(np.concatenate((amounts,amounts2))) \
             for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)]

        populationFitness.append(np.mean(modelFitness))
        bestFitness.append(np.max(replayModelFitness))

        print('................................')
        print(k, np.mean(modelFitness), np.max(replayModelFitness))
        #        print(k, np.mean([np.mean(amounts) for amounts in modelWinAmounts]),
        #              np.max([np.mean(np.concatenate((amounts,amounts2))) \
        #                      for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)]))
        #        print(np.argsort(replayModelFitness))

        # Save data
        #        [tf.keras.models.save_model(model, 'data/models/'+str(i)) for i,model in enumerate(models)]
        #        np.save('data/'+str(k)+'_win_amounts', modelWinAmounts)
        #    m = tf.keras.models.load_model('aa.aa')    # This is how to load, just a note

        # Put the best individual without mutation to the next generation
        nextGeneration = []
        nextGeneration = [models[idx] for idx in bestIdx[-3:]]

        # Mutate
        for i in range(POPULATION_SIZE - len(nextGeneration)):
            idx = bestIdx[np.random.randint(len(bestIdx))]

            model = copy.deepcopy(models[idx])
            model.mutate(MUTATION_SIGMA, ratio=MUTATION_RATIO)

            #            weights = model.get_weights()
            #            weightsUpdated = [w + np.random.normal(scale=MUTATION_SIGMA, size=w.shape) for w in weights]
            #            model.set_weights(weightsUpdated)

            nextGeneration.append(model)

        models = np.array(nextGeneration)

    n = 5
    plt.plot(populationFitness[n:])
    plt.plot(bestFitness[n:])
Пример #4
0
        ])
        models.append(m)

    ## Disable gpu
    #import os
    #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    #
    #if tf.test.gpu_device_name():
    #    print('GPU found')
    #else:
    #    print("No GPU found")

    # %%

    # Create game data for evaluation
    initGameStates, initStacks = initRandomGames(N_HANDS_FOR_EVAL)

    initGameStates.availableActions = np.repeat(
        initGameStates.availableActions, N_RND_PLAYS_PER_HAND, axis=0)
    initGameStates.boards = np.repeat(initGameStates.boards,
                                      N_RND_PLAYS_PER_HAND,
                                      axis=0)
    initGameStates.controlVariables = np.repeat(
        initGameStates.controlVariables, N_RND_PLAYS_PER_HAND, axis=0)
    initGameStates.players = GameDataContainer.unflattenPlayersData(
        np.repeat(GameDataContainer.flattenPlayersData(initGameStates.players),
                  N_RND_PLAYS_PER_HAND,
                  axis=0))
    initGameStates.validMask = np.repeat(initGameStates.validMask,
                                         N_RND_PLAYS_PER_HAND,
                                         axis=0)
Пример #5
0
        invalidIdx = np.nonzero(~validAmountsMask)[0]
        actionAmounts[~validAmountsMask] = availableActs[invalidIdx, closestIdx[invalidIdx]]
        actionAmounts[foldMask] = -1
        
        return createActionsToExecute(actionAmounts), mask



# %%

nGames = 5000
callPlayerIdx = 0
rndPlayerIdx = 1
nRandomSets = 8

initGameStates, initStacks = initRandomGames(nGames, seed=76)
equities = getEquities(initGameStates)

gameCont = GameDataContainer(nGames)

agents = [CallAgent(callPlayerIdx), RndAgent(rndPlayerIdx)]
#agents = [AiAgent(0, computeFeatures, regressor, equities), RndAgent(rndPlayerIdx)]

gameContainers = [playGames(agents, copy.deepcopy(initGameStates), copy.deepcopy(gameCont)) \
    for i in range(nRandomSets)]

# %%

winAmounts = [getWinAmounts(c, initStacks)[:,rndPlayerIdx] for c in gameContainers]
winAmounts = np.column_stack((winAmounts))