break # Save also last game state actionsToExecute = np.zeros( (len(gameStates.availableActions), 2), dtype=np.int64) - 999 gameDataContainer.addData(gameStates, actionsToExecute) return gameDataContainer # %% # Run random games to initialize random forest agent nGames = 10000 gameStates = initRandomGames(nGames) rfFeatures = RfFeatures(gameStates) #agents = [RndAgent(0), RfAgent(1, rfFeatures, regressor)] #agents = [RfAgent(0, rfFeatures, regressor), RfAgent(1, rfFeatures, regressor)] agents = [RndAgent(0), RndAgent(1)] rfFeatures = playGames(agents, gameStates, rfFeatures) features, executedActions, executedActionsNorm, misc = rfFeatures.getFeaturesForAllGameStates( ) # Do checks assert np.sum(misc['gameFailedMask']) == 0 # No failures assert np.sum( misc['gameFinishedMask']) == nGames # All games have ended succesfully
# actionAmounts = executedActions[:,1].copy() # actionAmounts[np.isclose(actionAmounts,0)] = 0.001 # betToWinRatios = winAmountsFeatures / actionAmounts # betToWinRatios = np.clip(betToWinRatios, a_min=-clippingThres, a_max=clippingThres) return winAmountsFeatures # %% # Run random games to initialize random forest agent # nGames = 100000 gameStates = initRandomGames(nGames) rfFeatures = RfFeatures(gameStates) #agents = [RndAgent(0), RfAgent(1, rfFeatures, regressor)] #agents = [RfAgent(0, rfFeatures, regressor), RfAgent(1, rfFeatures, regressor)] #agents = [RfAgent(0, rfFeatures, regressorOld), RfAgent(1, rfFeatures, regressor)] #agents = [RfAgent(0, rfFeatures, regressor0), RfAgent(1, rfFeatures, regressor)] #agents = [RndAgent(0), RndAgent(1)] while (1): actionsAgent0, maskAgent0 = agents[0].getActions(gameStates) actionsAgent1, maskAgent1 = agents[1].getActions(gameStates) actionsToExecute = np.zeros(
class AiModel(nn.Module): def __init__(self, winLen): super(AiModel, self).__init__() self.layers = nn.Sequential(nn.Linear(7 * (winLen + 17), 250), nn.ReLU(), nn.Linear(250, 10)) # Get references to weights and biases. These are used when mutating the model. self.weights, self.biases = [], [] for layer in self.layers: # Hack. Throws an AttributeError if there is no weights associated for the layer, e.q., nn.Relu try: self.weights.append(layer.weight) self.biases.append(layer.bias) except AttributeError: pass def forward(self, x): x = self.layers(x) return x # def mutate(self, sigma): # for i in range(len(self.weights)): # w = self.weights[i].data.numpy() # b = self.biases[i].data.numpy() # w += np.random.normal(scale=sigma, size=w.shape) # b += np.random.normal(scale=sigma, size=b.shape) def mutateWeights(self, sigma, ratio=1.0): for i in range(len(self.weights)): w = self.weights[i].data.numpy().reshape(-1) rndIdx = np.random.choice(len(w), size=max(1, int(ratio * len(w))), replace=0) w[rndIdx] += np.random.normal(scale=sigma, size=len(rndIdx)) # w = self.weights[i].data.numpy() # w += np.random.normal(scale=sigma, size=w.shape) def mutateBiases(self, sigma, ratio=1.0): for i in range(len(self.biases)): b = self.biases[i].data.numpy().reshape(-1) rndIdx = np.random.choice(len(b), size=max(1, int(ratio * len(b))), replace=0) b[rndIdx] += np.random.normal(scale=sigma, size=len(rndIdx)) # b = self.biases[i].data.numpy() # b += np.random.normal(scale=sigma, size=b.shape) def mutate(self, sigma, ratio=1.0): self.mutateWeights(sigma, ratio=ratio) self.mutateBiases(sigma, ratio=ratio) #if __name__ == "__main__": # %% # Initialize agent #SEED = 123 POPULATION_SIZE = 200 RATIO_BEST_INDIVIDUALS = 0.10 MUTATION_SIGMA = 1.0e-2 MUTATION_RATIO = 1.0 N_HANDS_FOR_EVAL = 25000 N_HANDS_FOR_RE_EVAL = 100000 N_RND_PLAYS_PER_HAND = 1 RND_AGENT_IDX = 0 AI_AGENT_IDX = np.abs(RND_AGENT_IDX - 1) WIN_LEN = 2 N_CORES = 6 device = torch.device('cpu') models = [] for i in range(POPULATION_SIZE): # m = keras.Sequential() # m.add(keras.layers.Dense(50, activation='relu', input_dim=7*(WIN_LEN+17))) # m.add(keras.layers.Dense(10, activation='relu')) models.append(AiModel(WIN_LEN).to(device)) models = np.array(models) # # Disable gpu # import os # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # if tf.test.gpu_device_name(): # print('GPU found') # else: # print("No GPU found") # %% # Create game data for evaluation initGameStates, initStacks = initRandomGames(N_HANDS_FOR_EVAL) initGameStates.availableActions = np.repeat( initGameStates.availableActions, N_RND_PLAYS_PER_HAND, axis=0) initGameStates.boards = np.repeat(initGameStates.boards, N_RND_PLAYS_PER_HAND, axis=0) initGameStates.controlVariables = np.repeat( initGameStates.controlVariables, N_RND_PLAYS_PER_HAND, axis=0) initGameStates.players = GameDataContainer.unflattenPlayersData( np.repeat(GameDataContainer.flattenPlayersData(initGameStates.players), N_RND_PLAYS_PER_HAND, axis=0)) initGameStates.validMask = np.repeat(initGameStates.validMask, N_RND_PLAYS_PER_HAND, axis=0) initGameStates.validMaskPlayers = np.repeat( initGameStates.validMaskPlayers, N_RND_PLAYS_PER_HAND, axis=0) initStacks = np.repeat(initStacks, N_RND_PLAYS_PER_HAND, axis=0) # smallBlindsForGames = initGameStates.boards[:,1] populationFitness, bestFitness = [], [] # %% for k in range(50): states, stacks = initRandomGames(int(N_HANDS_FOR_EVAL * 0.25)) # smallBlinds = states.boards[:,1] rndIdx = np.random.choice(N_HANDS_FOR_EVAL, size=len(stacks), replace=0) # smallBlindsForGames[rndIdx] = smallBlinds initStacks[rndIdx] = stacks initGameStates.availableActions[rndIdx] = states.availableActions initGameStates.boards[rndIdx] = states.boards initGameStates.controlVariables[rndIdx] = states.controlVariables rndIdx2 = np.repeat(rndIdx * 2, 2) rndIdx2[1::2] = rndIdx * 2 + 1 initGameStates.players[rndIdx2] = states.players # Play games finalGameStates = playGamesParallel(initGameStates, models, N_CORES, WIN_LEN, RND_AGENT_IDX, AI_AGENT_IDX) assert len(finalGameStates) == POPULATION_SIZE modelWinAmounts = getWinAmountsForModels(finalGameStates, initStacks, AI_AGENT_IDX) # modelWinAmounts = optimizeWinAmounts(modelWinAmounts) modelFitness = [np.mean(amounts) for amounts in modelWinAmounts] # modelFitness = [np.mean(amounts)/np.std(amounts) for amounts in modelWinAmounts] # modelFitness = [(np.mean(amounts)) + (np.sum(~(np.isclose(amounts,-1) | \ # np.isclose(amounts,-2)))/len(amounts)) for amounts in modelWinAmounts] sorter = np.argsort(modelFitness) bestIdx = sorter[-int(len(sorter) * RATIO_BEST_INDIVIDUALS):] replayGameStates, replayStacks = initRandomGames(N_HANDS_FOR_RE_EVAL) replayFinalGameStates = playGamesParallel(replayGameStates, models[bestIdx], N_CORES, WIN_LEN, RND_AGENT_IDX, AI_AGENT_IDX) assert len(finalGameStates) == POPULATION_SIZE replayModelWinAmounts = getWinAmountsForModels(replayFinalGameStates, replayStacks, AI_AGENT_IDX) # replayModelWinAmounts = optimizeWinAmounts(replayModelWinAmounts) # replayModelFitness = [(np.mean(np.concatenate((amounts,amounts2)))) + \ # (np.sum(~(np.isclose(np.concatenate((amounts,amounts2)),-1) | \ # np.isclose(np.concatenate((amounts,amounts2)),-2)))/ \ # len(np.concatenate((amounts,amounts2)))) replayModelFitness = [np.mean(np.concatenate((amounts,amounts2))) \ # replayModelFitness = [np.mean(np.concatenate((amounts,amounts2)))/ \ # np.std(np.concatenate((amounts,amounts2))) \ for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)] # print(np.argsort(replayModelFitness)) bestIdx = bestIdx[np.argsort(replayModelFitness)] [np.mean(np.concatenate((amounts,amounts2))) \ for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)] populationFitness.append(np.mean(modelFitness)) bestFitness.append(np.max(replayModelFitness)) print('................................') print(k, np.mean(modelFitness), np.max(replayModelFitness)) # print(k, np.mean([np.mean(amounts) for amounts in modelWinAmounts]), # np.max([np.mean(np.concatenate((amounts,amounts2))) \ # for amounts,amounts2 in zip(replayModelWinAmounts,modelWinAmounts)])) # print(np.argsort(replayModelFitness)) # Save data # [tf.keras.models.save_model(model, 'data/models/'+str(i)) for i,model in enumerate(models)] # np.save('data/'+str(k)+'_win_amounts', modelWinAmounts) # m = tf.keras.models.load_model('aa.aa') # This is how to load, just a note # Put the best individual without mutation to the next generation nextGeneration = [] nextGeneration = [models[idx] for idx in bestIdx[-3:]] # Mutate for i in range(POPULATION_SIZE - len(nextGeneration)): idx = bestIdx[np.random.randint(len(bestIdx))] model = copy.deepcopy(models[idx]) model.mutate(MUTATION_SIGMA, ratio=MUTATION_RATIO) # weights = model.get_weights() # weightsUpdated = [w + np.random.normal(scale=MUTATION_SIGMA, size=w.shape) for w in weights] # model.set_weights(weightsUpdated) nextGeneration.append(model) models = np.array(nextGeneration) n = 5 plt.plot(populationFitness[n:]) plt.plot(bestFitness[n:])
]) models.append(m) ## Disable gpu #import os #os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # #if tf.test.gpu_device_name(): # print('GPU found') #else: # print("No GPU found") # %% # Create game data for evaluation initGameStates, initStacks = initRandomGames(N_HANDS_FOR_EVAL) initGameStates.availableActions = np.repeat( initGameStates.availableActions, N_RND_PLAYS_PER_HAND, axis=0) initGameStates.boards = np.repeat(initGameStates.boards, N_RND_PLAYS_PER_HAND, axis=0) initGameStates.controlVariables = np.repeat( initGameStates.controlVariables, N_RND_PLAYS_PER_HAND, axis=0) initGameStates.players = GameDataContainer.unflattenPlayersData( np.repeat(GameDataContainer.flattenPlayersData(initGameStates.players), N_RND_PLAYS_PER_HAND, axis=0)) initGameStates.validMask = np.repeat(initGameStates.validMask, N_RND_PLAYS_PER_HAND, axis=0)
invalidIdx = np.nonzero(~validAmountsMask)[0] actionAmounts[~validAmountsMask] = availableActs[invalidIdx, closestIdx[invalidIdx]] actionAmounts[foldMask] = -1 return createActionsToExecute(actionAmounts), mask # %% nGames = 5000 callPlayerIdx = 0 rndPlayerIdx = 1 nRandomSets = 8 initGameStates, initStacks = initRandomGames(nGames, seed=76) equities = getEquities(initGameStates) gameCont = GameDataContainer(nGames) agents = [CallAgent(callPlayerIdx), RndAgent(rndPlayerIdx)] #agents = [AiAgent(0, computeFeatures, regressor, equities), RndAgent(rndPlayerIdx)] gameContainers = [playGames(agents, copy.deepcopy(initGameStates), copy.deepcopy(gameCont)) \ for i in range(nRandomSets)] # %% winAmounts = [getWinAmounts(c, initStacks)[:,rndPlayerIdx] for c in gameContainers] winAmounts = np.column_stack((winAmounts))