Python TpgTrainer.evolve примеры использования

Язык программирования: Python

Пространство имен/Пакет: tpg.tpg_trainer

Класс/Тип: TpgTrainer

Метод/Функция: evolve

Примеров на hotexamples.com: 7

Python TpgTrainer.evolve - 7 примеров найдено. Это лучшие примеры Python кода для tpg.tpg_trainer.TpgTrainer.evolve, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TpgTrainer(7)

evolve(7)

getAllAgents(7)

applyScores(5)

getBestTeams(3)

getNextAgent(2)

getTaskScores(2)

createNewPopulation(1)

generateScoreStats(1)

multiEvolve(1)

remainingAgents(1)

Пример #1

Показать файл

Файл: tpg_physics.py Проект: eivinasbutkus/tpg-physics

def main():

    # initiates trainer, actions are these predictions of the agent:
    # action 1 -> right box will fall off the edge
    # action 0 -> right box will not fall off
    trainer = TpgTrainer(actions=[0, 1], teamPopSize=50)

    _min, _max, _avg = [], [], []  # hold values for every generation

    for gen in range(GENERATIONS):  # generation loop
        print("Generation: ", gen + 1, "/", GENERATIONS)
        curScores = []  # new list per gen

        while True:  # loop to go through agents
            agent = trainer.getNextAgent()
            if agent is None:
                break  # no more agents, so proceed to next gen

            # evaluting the agent
            score = 0
            for i in range(EVALUATIONS):
                score += evaluateAgent(agent)
            agent.reward(score)

            curScores.append(score)

        print("Min:", min(curScores), "   Max:", max(curScores), "   Avg:",
              sum(curScores) / len(curScores),
              "(out of " + str(EVALUATIONS) + ")\n")

        _min.append(min(curScores))
        _max.append(max(curScores))
        _avg.append(sum(curScores) / len(curScores))

        trainer.evolve()

    # getting best agent after all the generations
    best_agent, best_score = getBestAgent(trainer)

    print("Best agent's score:", best_score, "/", EVALUATIONS)

    for run in range(FINAL_RUNS):
        print("Final run: ", run + 1, "/", FINAL_RUNS, end='\r')
        evaluateAgent(best_agent, graphics=True)

    # plotting progress over the generations
    generations = range(1, GENERATIONS + 1)

    axes = plt.gca()
    axes.set_ylim([0, EVALUATIONS])

    plt.plot(generations, _min, label="min")
    plt.plot(generations, _max, label="max")
    plt.plot(generations, _avg, label="avg")

    plt.xlabel("generation")
    plt.ylabel("score")
    plt.legend()
    plt.show()

Пример #2

Показать файл

Файл: performance-test.py Проект: Ryan-Amaral/general-game-playing-tpg

def runTpg():

    tmpEnv = gym.make(options.envName)
    # create TPG
    trainer = TpgTrainer(actions=range(tmpEnv.action_space.n),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    tmpEnv.close()

    print('Playing Game: ' + options.envName)

    while trainer.populations[
            None].curGen < options.generations:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        scoreList = man.list()

        # run all agents on env
        pool.map(runAgent, [(agent, options.envName, scoreList,
                             options.trainEps, options.trainFrames, None)
                            for agent in trainer.getAllAgents(
                                skipTasks=[options.envName], noRef=True)])

        trainer.applyScores(scoreList)

        # do evolution after all envs played
        trainer.evolve(tasks=[options.envName], elitistTasks=[options.envName])

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=[options.envName])[0]
        with open(logFileTpg, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + options.envName +
                ',' + str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')

Пример #3

Показать файл

Файл: test-mp.py Проект: Ryan-Amaral/gvgai-tpg

        [
            (agent, game, scoreQueue)  #(agent, envQueue, scoreQueue)
            for agent in trainer.getAllAgents(skipTasks=[])
        ])

    scores = []  # convert scores into list
    while not scoreQueue.empty():
        scores.append(scoreQueue.get())

    # save model before every evolve in case issue
    with open('gvgai-model-1be.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # apply scores
    trainer.applyScores(scores)
    trainer.evolve()  # go into next gen

    # save model after every gen
    with open('gvgai-model-1ae.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # at end of generation, make summary of scores
    summaryScores.append((trainer.scoreStats['min'], trainer.scoreStats['max'],
                          trainer.scoreStats['average']))  # min, max, avg
    print(chr(27) + "[2J")
    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('Results so far: ' + str(summaryScores))

print(chr(27) + "[2J")
print('Time Taken (Seconds): ' + str(time.time() - tStart))
print('Results: ' + str(summaryScores))

Пример #4

Показать файл

                act = agent.act(getState(state),
                                valActs=valActs)  # get action from agent

                # feedback from env
                state, reward, isDone, debug = env.step(act)
                score += reward  # accumulate reward in score
                if isDone:
                    break  # end early if losing state

            agent.reward(score)  # must reward agent (if didn't already score)

        print('Agent #' + str(agent.getAgentNum()) + ' finished with score ' +
              str(score))
        curScores.append(score)  # store score

    trainer.evolve()

    # save model after every gen
    with open('gvgai-model-1.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # at end of generation, make summary of scores
    summaryScores.append((min(curScores), max(curScores),
                          sum(curScores) / len(curScores)))  # min, max, avg

    print(chr(27) + "[2J")
    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('Results so far: ' + str(summaryScores))

#clear_output(wait=True)
print('Time Taken (Seconds): ' + str(time.time() - tStart))

Пример #5

Показать файл

        # run all agents on env
        pool.map(runAgent,
                 [(agent, envName, scoreList, 3, 18000, None)
                  for agent in trainer.getAllAgents(
                      skipTasks=[envName], noRef=True, popName=envName)])

        trainer.applyScores(scoreList, popName=envName)

        # report curEnv results to log
        scoreStats = trainer.getTaskScores(envName, popName=envName)
        bestTeam = trainer.getBestTeams(tasks=[envName], popName=envName)[0]
        with open(logName, 'a') as f:
            f.write(
                str(trainer.populations[envName].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' +
                str(scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + '\n')

        # do evolution on each env played
        trainer.evolve(fitMethod='single',
                       tasks=[envName],
                       elitistTasks=[envName],
                       popName=envName)

    # update most recent model results
    with open('trainer-8-pops.pkl', 'wb') as f:
        pickle.dump(trainer, f)

Пример #6

Показать файл

        frames = 1000
    elif curGen == 25:
        frames = 5000
    elif curGen == 50:
        frames = 18000
    pool.map(runAgent,
             [(agent, envName, scoreList, 1, frames) for agent in agents])

    # apply scores
    trainer.applyScores(scoreList)

    tasks = [envName + '-' + str(frames)]
    scoreStats = trainer.generateScoreStats(tasks=tasks)
    allScores.append(
        (envName, scoreStats['min'], scoreStats['max'], scoreStats['average']))
    trainer.evolve(tasks=tasks, fitShare=False)  # go into next gen

    # save model after every gen
    with open('saved-model-sgp.pkl', 'wb') as f:
        pickle.dump(trainer, f)
    # save best agent after every gen
    #with open('best-agent-sgp.pkl','wb') as f:
    #    pickle.dump(trainer.getBestAgents(tasks=tasks)[0],f)

    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('On Generation: ' + str(curGen))
    #print('Results: ', str(allScores))

    with open(logFileName, 'a') as f:
        f.write(
            str(curGen) + ' | ' + str(envName) + ' | ' +

Пример #7

Показать файл

Файл: train-tpg.py Проект: Ryan-Amaral/prosthetic-challenge-tpg

allScores = []  # track all scores each generation

tStart = time.time()

logFileName = 'train-log-' + datetime.datetime.now().strftime(
    "%Y-%m-%d-%H-%M") + '.txt'

while True:  # do generations with no end
    scoreList = man.list()

    pool.map(runAgent, [(agent, scoreList, 0.05)
                        for agent in trainer.getAllAgents(noRef=True)])

    # apply scores
    trainer.applyScores(scoreList)
    trainer.evolve(fitShare=False, tasks=[])
    scoreStats = trainer.scoreStats
    allScores.append((trainer.curGen, scoreStats['min'], scoreStats['max'],
                      scoreStats['average']))

    # save model after every gen
    with open('saved-model-1.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # save best agent each generation
    #with open('best-agent.pkl','wb') as f:
    #    pickle.dump(trainer.getBestAgent(tasks=[]), f)

    print(chr(27) + "[2J")
    print('Time So Far (Seconds): ' + str(time.time() - tStart))
    print('Results: ', str(allScores))