def runTpg():

    tmpEnv = gym.make(options.envName)
    # create TPG
    trainer = TpgTrainer(actions=range(tmpEnv.action_space.n),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    tmpEnv.close()

    print('Playing Game: ' + options.envName)

    while trainer.populations[
            None].curGen < options.generations:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        scoreList = man.list()

        # run all agents on env
        pool.map(runAgent, [(agent, options.envName, scoreList,
                             options.trainEps, options.trainFrames, None)
                            for agent in trainer.getAllAgents(
                                skipTasks=[options.envName], noRef=True)])

        trainer.applyScores(scoreList)

        # do evolution after all envs played
        trainer.evolve(tasks=[options.envName], elitistTasks=[options.envName])

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=[options.envName])[0]
        with open(logFileTpg, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + options.envName +
                ',' + str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')
Пример #2
0
        runAgent,
        [
            (agent, game, scoreQueue)  #(agent, envQueue, scoreQueue)
            for agent in trainer.getAllAgents(skipTasks=[])
        ])

    scores = []  # convert scores into list
    while not scoreQueue.empty():
        scores.append(scoreQueue.get())

    # save model before every evolve in case issue
    with open('gvgai-model-1be.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # apply scores
    trainer.applyScores(scores)
    trainer.evolve()  # go into next gen

    # save model after every gen
    with open('gvgai-model-1ae.pkl', 'wb') as f:
        pickle.dump(trainer, f)

    # at end of generation, make summary of scores
    summaryScores.append((trainer.scoreStats['min'], trainer.scoreStats['max'],
                          trainer.scoreStats['average']))  # min, max, avg
    print(chr(27) + "[2J")
    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('Results so far: ' + str(summaryScores))

print(chr(27) + "[2J")
print('Time Taken (Seconds): ' + str(time.time() - tStart))
def ggpTrainAllAtOnce():

    # create TPG
    trainer = TpgTrainer(actions=range(18),
                         teamPopSize=options.popSize,
                         rTeamPopSize=options.popSize,
                         maxProgramSize=128)

    envNamesSrt = sorted(list(allEnvNames))  # for reporting envs played

    fitnessEnvs = []

    while True:  # train indefinately
        print('TPG Gen: ' + str(trainer.populations[None].curGen))

        if options.fitGamesNum < options.numGames and options.fitGamesNum > 0:
            fitnessEnvs.append(
                random.choice(list(set(allEnvNames) - set(fitnessEnvs))))
            if len(fitnessEnvs) > options.fitGamesNum:
                fitnessEnvs.pop(0)
        else:
            fitnessEnvs = list(allEnvNames)

        for envName in fitnessEnvs:
            print('Playing Game: ' + envName)

            scoreList = man.list()

            # run all agents on env
            pool.map(runAgent,
                     [(agent, envName, scoreList, options.trainEps,
                       options.trainFrames, None)
                      for agent in trainer.getAllAgents(skipTasks=[envName],
                                                        noRef=True)])

            trainer.applyScores(scoreList)

            # report curEnv results to log
            scoreStats = trainer.getTaskScores(envName)
            bestTeam = trainer.getBestTeams(tasks=[envName])[0]
            with open(logFileGameScoresName, 'a') as f:
                f.write(
                    str(trainer.populations[None].curGen) + ',' +
                    str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                    str(scoreStats['min']) + ',' + str(scoreStats['max']) +
                    ',' + str(scoreStats['average']) + ',' +
                    str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                    str(bestTeam.uid) + '\n')

        # do evolution after all envs played
        trainer.multiEvolve(tasks=[fitnessEnvs] + [[en] for en in fitnessEnvs],
                            weights=[0.5] +
                            [0.5 / len(fitnessEnvs) for _ in fitnessEnvs],
                            fitMethod='min',
                            elitistTasks=allEnvNames)

        # report generational fitness results
        bestTeam = trainer.getBestTeams(tasks=fitnessEnvs)[0]
        with open(logFileFitnessName, 'a') as f:
            f.write(
                str(trainer.populations[None].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' +
                '/'.join(sorted(list(fitnessEnvs))) + ',' +
                str(trainer.populations[None].scoreStats['min']) + ',' +
                str(trainer.populations[None].scoreStats['max']) + ',' +
                str(trainer.populations[None].scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + ',' +
                str(len(trainer.populations[None].teams)) + ',' +
                str(len(trainer.populations[None].rootTeams)) + '\n')

        # save model after every gen
        with open(trainerFileName, 'wb') as f:
            pickle.dump(trainer, f)

        # every 10 generations save all agent outcomes
        if trainer.populations[None].curGen % 10 == 0:
            for rt in trainer.populations[None].rootTeams:
                with open(logFileOutcomesName, 'a') as f:
                    f.write(
                        str(trainer.populations[None].curGen) + ',' +
                        str((time.time() - tstart) / 3600) + ',' + str(rt.uid))
                    for envName in allEnvNames:
                        f.write(',' + str(rt.outcomes.get(envName, '-999999')))
                    f.write('\n')

        # every 50 generations evaluate top agents on all games
        if trainer.populations[None].curGen % options.champEvalGen == 0:
            champEval(envNamesSrt,
                      trainer,
                      logFileChampionsName,
                      pool,
                      man,
                      tstart,
                      frames=options.testFrames,
                      eps=options.testEps)
Пример #4
0
while trainer.populations[
        allEnvNames[0]].curGen < 300:  # 300 generations at each game
    print('TPG Gen: ' + str(trainer.populations[envName].curGen))
    for envName in allEnvNames:  # train on each env
        print('Playing Game: ' + envName)

        scoreList = man.list()

        # run all agents on env
        pool.map(runAgent,
                 [(agent, envName, scoreList, 3, 18000, None)
                  for agent in trainer.getAllAgents(
                      skipTasks=[envName], noRef=True, popName=envName)])

        trainer.applyScores(scoreList, popName=envName)

        # report curEnv results to log
        scoreStats = trainer.getTaskScores(envName, popName=envName)
        bestTeam = trainer.getBestTeams(tasks=[envName], popName=envName)[0]
        with open(logName, 'a') as f:
            f.write(
                str(trainer.populations[envName].curGen) + ',' +
                str((time.time() - tstart) / 3600) + ',' + envName + ',' +
                str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' +
                str(scoreStats['average']) + ',' +
                str(len(bestTeam.getRootTeamGraph()[0])) + ',' +
                str(bestTeam.uid) + '\n')

        # do evolution on each env played
        trainer.evolve(fitMethod='single',
Пример #5
0
    agents = trainer.getAllAgents(
        skipTasks=[envName + '-' + str(frames)],
        noRef=True)  # swap out agents only at start of generation

    if curGen == 1:
        frames = 1000
    elif curGen == 25:
        frames = 5000
    elif curGen == 50:
        frames = 18000
    pool.map(runAgent,
             [(agent, envName, scoreList, 1, frames) for agent in agents])

    # apply scores
    trainer.applyScores(scoreList)

    tasks = [envName + '-' + str(frames)]
    scoreStats = trainer.generateScoreStats(tasks=tasks)
    allScores.append(
        (envName, scoreStats['min'], scoreStats['max'], scoreStats['average']))
    trainer.evolve(tasks=tasks, fitShare=False)  # go into next gen

    # save model after every gen
    with open('saved-model-sgp.pkl', 'wb') as f:
        pickle.dump(trainer, f)
    # save best agent after every gen
    #with open('best-agent-sgp.pkl','wb') as f:
    #    pickle.dump(trainer.getBestAgents(tasks=tasks)[0],f)

    print('Time Taken (Seconds): ' + str(time.time() - tStart))