Пример #1
0
def eval_against_all(genomes, config):
    for genome_id, genome in genomes:
        model = neat.nn.FeedForwardNetwork.create(genome, config)
        genome.fitness = 0.0
        sizeX = 6
        sizeY = 6
        for opp in [(game.RandomPlayer, 0), (game.AILook1Player, 1),
                    (game.AILook1Player, 5)]:
            for episode in range(0, 50):
                board = game.Board(sizeX, sizeY)
                if opp[0].__name__ == "RandomPlayer":
                    board.setPlayers(
                        game.AITrainingReduFeatWOPlayer("1", board, model, 2),
                        opp[0]("2"))
                else:
                    board.setPlayers(
                        game.AITrainingReduFeatWOPlayer("1", board, model, 2),
                        opp[0]("2", board, opp[1]))
                board.startGameWithPseudoRandomStartPositions()
                result = board.play(False)
                if result == 1:
                    genome.fitness += 1.0
                elif result == 2:
                    genome.fitness -= 1.0
Пример #2
0
def trainModelWithDMC(p_boardSizeX,
                      p_boardSizeY,
                      p_episodes,
                      savePath,
                      p_temperature=20.0,
                      p_gamma=0.9,
                      howFar=2,
                      rewardFunc=game.rewardFunc2,
                      selfPlayFixOpp=False,
                      startingModel=None,
                      opponentConstr=None,
                      opponentStrength=None):

    model = startingModel

    if startingModel is None:
        model = Sequential()
        model.add(
            Dense(70,
                  kernel_initializer='lecun_uniform',
                  activation='relu',
                  input_shape=(2 * (4 + (2 * howFar + 1) *
                                    (2 * howFar + 1)), )))

        model.add(
            Dense(35, kernel_initializer='lecun_uniform', activation='relu'))

        model.add(
            Dense(3, kernel_initializer='lecun_uniform', activation='linear'))

        model.compile(loss='mean_squared_error', optimizer=RMSprop())

    episodes = p_episodes
    gamma = p_gamma
    startingTemperature = p_temperature
    temperature = p_temperature
    batchSize = 1
    buffer = 1
    replay = []
    h = 0
    updateStep = 0
    for i in range(episodes):

        rewardList = []
        stateList = []
        actionList = []

        #init board
        board = game.Board(p_boardSizeX, p_boardSizeY)

        #set opponent
        if (opponentConstr is None):
            if (selfPlayFixOpp == True):
                oppModel = clone_model(model)
                oppModel.set_weights(model.get_weights())
                board.setPlayers(
                    game.AITrainingReduFeatWOPlayer("1", board, model, howFar),
                    game.AITrainingReduFeatWOPlayer("2", board, oppModel,
                                                    howFar))
            else:
                board.setPlayers(
                    game.AITrainingReduFeatWOPlayer("1", board, model, howFar),
                    game.AITrainingReduFeatWOPlayer("2", board, model, howFar))
        else:
            try:
                opp = opponentConstr("2", board, opponentStrength)
            except TypeError:
                opp = opponentConstr("2")
            board.setPlayers(
                game.AITrainingReduFeatWOPlayer("1", board, model, howFar),
                opp)

        #set start strategy
        board.startGameWithPseudoRandomStartPositions()
        while (board.checkGameStatus() == 0):

            #Boltzman action selection
            board.player1.getDirection()
            Qprobs = game.softmax(board.player1.vals / temperature)
            action_value = np.random.choice(Qprobs[0], p=Qprobs[0])
            action = np.argmax(Qprobs[0] == action_value) - 1
            actionList.append(action)
            #Take action, observe new state S'

            state = board.to01ReducedFeaturesWithOpponent(
                board.player1, board.player2,
                howFar).reshape(1,
                                2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)))
            stateList.append(state)

            board.movePlayers(action, board.player2.getDirection())

            gameStatus = board.checkGameStatus()

            #Observe reward
            reward = rewardFunc(gameStatus)

            rewardList.append(reward)

        dAC = discountedAccRewards(rewardList, gamma)
        for stepNum in range(len(rewardList)):

            if (len(replay) < buffer):
                replay.append(
                    (stateList[stepNum], actionList[stepNum], dAC[stepNum]))
            else:
                if (h < (buffer - 1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (stateList[stepNum], actionList[stepNum],
                             dAC[stepNum])
                #randomly sample our experience replay memory
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []

                for memory in minibatch:
                    state, action, accReward = memory
                    Qvals = model.predict(state, batch_size=1)
                    y = np.zeros((1, 3))
                    y[:] = Qvals[:]
                    y[0][action +
                         1] = accReward  #action + 1 because actions are -1,0,1
                    X_train.append(
                        state.reshape(
                            2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)), ))
                    y_train.append(y.reshape(3, ))

                X_train = np.array(X_train)
                y_train = np.array(y_train)
                print("Game #: %s" % (i, ))
                model.fit(X_train,
                          y_train,
                          batch_size=batchSize,
                          epochs=1,
                          verbose=1)
                updateStep += 1
        if i % 10000 == 0:
            model.save(savePath)
        if temperature > 1.0:
            temperature -= (startingTemperature / episodes)
        else:
            temperature = 1.0
    model.save(savePath)
Пример #3
0
from keras.models import load_model

import game
from util import loadNEATmodel

gameStats = {}
gameStats[1] = 0
gameStats[2] = 0
gameStats[3] = 0

for episode in range(0, 60):
    board = game.Board(6, 6)
    #board.setPlayers(game.AITrainingReduFeatPlayer("1",board,loadNEATmodel('training/resultsPSR/NEAT/RF3-PSR','training/config-neat-RF3.txt'),3),game.AILook1Player("2",board,5))
    board.setPlayers(
        game.AITrainingReduFeatWOPlayer(
            "1", board,
            load_model('training/resultsPSR/opp/modelSelfPlayBig2'), 2),
        game.AILook1Player("2", board, 5))  # game.RandomPlayer("2")
    board.startGameWithPseudoRandomStartPositions()
    result = board.play(False)
    print("Ends with: ", result)
    gameStats[result] += 1
    print("Game: ", episode)
print(gameStats)
print('5')
Пример #4
0
import game
from util import loadNEATmodel
from keras.models import load_model

#upotrijebiti odgovarajućeg AITrainingPlayer-a za svoj model

board = game.Board(6, 6)
#board.setPlayers(game.AITrainingPlayer("1",board,loadNEATmodel('training/resultsPSR/NEAT/RF3-PSR','training/config-neat-RF3.txt')),game.AILook1Player("2",board,1))
board.setPlayers(
    game.AITrainingReduFeatWOPlayer(
        "1", board, load_model('training/resultsPSR/techn/modelDQLF'), 2),
    game.HumanPlayer("2"))
#board.setPlayers(game.HumanPlayer("1"),game.HumanPlayer("2"))
board.startGameWithPseudoRandomStartPositions()
print("Game ends with:", board.play(True))
Пример #5
0
def trainModelWithDQL(p_boardSizeX,
                      p_boardSizeY,
                      p_episodes,
                      savePath,
                      p_temperature=20.0,
                      p_gamma=0.9,
                      fixedTargetSteps=500,
                      howFar=2,
                      rewardFunc=game.rewardFunc2,
                      selfPlayFixOpp=False,
                      startingModel=None,
                      opponentConstr=None,
                      opponentStrength=None):

    model = startingModel

    if startingModel is None:
        model = Sequential()
        model.add(
            Dense(70,
                  kernel_initializer='lecun_uniform',
                  activation='relu',
                  input_shape=(2 * (4 + (2 * howFar + 1) *
                                    (2 * howFar + 1)), )))

        model.add(
            Dense(35, kernel_initializer='lecun_uniform', activation='relu'))

        model.add(
            Dense(3, kernel_initializer='lecun_uniform', activation='linear'))

        model.compile(loss='mean_squared_error', optimizer=RMSprop())

    episodes = p_episodes
    gamma = p_gamma
    startingTemperature = p_temperature
    temperature = p_temperature
    batchSize = 50
    buffer = 500
    replay = []
    h = 0
    targetModel = None
    updateStep = 0
    for i in range(episodes):

        #init board
        board = game.Board(p_boardSizeX, p_boardSizeY)

        #set opponent
        if (opponentConstr is None):
            if (selfPlayFixOpp == True):
                oppModel = clone_model(model)
                oppModel.set_weights(model.get_weights())
                board.setPlayers(
                    game.AITrainingReduFeatWOPlayer("1", board, model, howFar),
                    game.AITrainingReduFeatWOPlayer("2", board, oppModel,
                                                    howFar))
            else:
                board.setPlayers(
                    game.AITrainingReduFeatWOPlayer("1", board, model, howFar),
                    game.AITrainingReduFeatWOPlayer("2", board, model, howFar))
        else:
            try:
                opp = opponentConstr("2", board, opponentStrength)
            except TypeError:
                opp = opponentConstr("2")
            board.setPlayers(
                game.AITrainingReduFeatWOPlayer("1", board, model, howFar),
                opp)

        #set start strategy
        board.startGameWithPseudoRandomStartPositions()
        while (board.checkGameStatus() == 0):

            #Boltzman action selection
            board.player1.getDirection()
            Qprobs = game.softmax(board.player1.vals / temperature)
            action_value = np.random.choice(Qprobs[0], p=Qprobs[0])
            action = np.argmax(Qprobs[0] == action_value) - 1

            #Take action, observe new state S'

            oldState = board.to01ReducedFeaturesWithOpponent(
                board.player1, board.player2,
                howFar).reshape(1,
                                2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)))
            board.movePlayers(action, board.player2.getDirection())
            newState = board.to01ReducedFeaturesWithOpponent(
                board.player1, board.player2,
                howFar).reshape(1,
                                2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)))

            gameStatus = board.checkGameStatus()

            #Observe reward
            reward = rewardFunc(gameStatus)

            if (len(replay) < buffer):  #if buffer not filled, add to it
                replay.append((oldState, action, reward, gameStatus, newState))
            else:
                if (h < (buffer - 1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (oldState, action, reward, gameStatus, newState)
                #randomly sample our experience replay memory
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []

                #fixed target model
                if targetModel is None:
                    targetModel = clone_model(model)
                    targetModel.set_weights(model.get_weights())
                elif updateStep % fixedTargetSteps == 0:
                    targetModel = clone_model(model)
                    targetModel.set_weights(model.get_weights())

                for memory in minibatch:
                    oldState, action, reward, gameStatus, newState = memory
                    oldQvals = model.predict(oldState, batch_size=1)
                    newQvals = targetModel.predict(newState, batch_size=1)
                    maxQval = np.max(newQvals)
                    y = np.zeros((1, 3))
                    y[:] = oldQvals[:]
                    if gameStatus == 0:  #non-terminal state
                        update = (reward + (gamma * maxQval))
                    else:  #terminal state
                        update = reward
                    y[0][action +
                         1] = update  #action + 1 because actions are -1,0,1
                    X_train.append(
                        oldState.reshape(
                            2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)), ))
                    y_train.append(y.reshape(3, ))

                X_train = np.array(X_train)
                y_train = np.array(y_train)
                print("Game #: %s" % (i, ))
                model.fit(X_train,
                          y_train,
                          batch_size=batchSize,
                          epochs=1,
                          verbose=1)
                updateStep += 1
        if i % 10000 == 0:
            model.save(savePath)
        if temperature > 1.0:
            temperature -= (startingTemperature / episodes)
        else:
            temperature = 1.0
    model.save(savePath)