def runTpg(): tmpEnv = gym.make(options.envName) # create TPG trainer = TpgTrainer(actions=range(tmpEnv.action_space.n), teamPopSize=options.popSize, rTeamPopSize=options.popSize, maxProgramSize=128) tmpEnv.close() print('Playing Game: ' + options.envName) while trainer.populations[ None].curGen < options.generations: # train indefinately print('TPG Gen: ' + str(trainer.populations[None].curGen)) scoreList = man.list() # run all agents on env pool.map(runAgent, [(agent, options.envName, scoreList, options.trainEps, options.trainFrames, None) for agent in trainer.getAllAgents( skipTasks=[options.envName], noRef=True)]) trainer.applyScores(scoreList) # do evolution after all envs played trainer.evolve(tasks=[options.envName], elitistTasks=[options.envName]) # report generational fitness results bestTeam = trainer.getBestTeams(tasks=[options.envName])[0] with open(logFileTpg, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + options.envName + ',' + str(trainer.populations[None].scoreStats['min']) + ',' + str(trainer.populations[None].scoreStats['max']) + ',' + str(trainer.populations[None].scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + ',' + str(len(trainer.populations[None].teams)) + ',' + str(len(trainer.populations[None].rootTeams)) + '\n')
runAgent, [ (agent, game, scoreQueue) #(agent, envQueue, scoreQueue) for agent in trainer.getAllAgents(skipTasks=[]) ]) scores = [] # convert scores into list while not scoreQueue.empty(): scores.append(scoreQueue.get()) # save model before every evolve in case issue with open('gvgai-model-1be.pkl', 'wb') as f: pickle.dump(trainer, f) # apply scores trainer.applyScores(scores) trainer.evolve() # go into next gen # save model after every gen with open('gvgai-model-1ae.pkl', 'wb') as f: pickle.dump(trainer, f) # at end of generation, make summary of scores summaryScores.append((trainer.scoreStats['min'], trainer.scoreStats['max'], trainer.scoreStats['average'])) # min, max, avg print(chr(27) + "[2J") print('Time Taken (Seconds): ' + str(time.time() - tStart)) print('Results so far: ' + str(summaryScores)) print(chr(27) + "[2J") print('Time Taken (Seconds): ' + str(time.time() - tStart))
def ggpTrainAllAtOnce(): # create TPG trainer = TpgTrainer(actions=range(18), teamPopSize=options.popSize, rTeamPopSize=options.popSize, maxProgramSize=128) envNamesSrt = sorted(list(allEnvNames)) # for reporting envs played fitnessEnvs = [] while True: # train indefinately print('TPG Gen: ' + str(trainer.populations[None].curGen)) if options.fitGamesNum < options.numGames and options.fitGamesNum > 0: fitnessEnvs.append( random.choice(list(set(allEnvNames) - set(fitnessEnvs)))) if len(fitnessEnvs) > options.fitGamesNum: fitnessEnvs.pop(0) else: fitnessEnvs = list(allEnvNames) for envName in fitnessEnvs: print('Playing Game: ' + envName) scoreList = man.list() # run all agents on env pool.map(runAgent, [(agent, envName, scoreList, options.trainEps, options.trainFrames, None) for agent in trainer.getAllAgents(skipTasks=[envName], noRef=True)]) trainer.applyScores(scoreList) # report curEnv results to log scoreStats = trainer.getTaskScores(envName) bestTeam = trainer.getBestTeams(tasks=[envName])[0] with open(logFileGameScoresName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + envName + ',' + str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' + str(scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + '\n') # do evolution after all envs played trainer.multiEvolve(tasks=[fitnessEnvs] + [[en] for en in fitnessEnvs], weights=[0.5] + [0.5 / len(fitnessEnvs) for _ in fitnessEnvs], fitMethod='min', elitistTasks=allEnvNames) # report generational fitness results bestTeam = trainer.getBestTeams(tasks=fitnessEnvs)[0] with open(logFileFitnessName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + '/'.join(sorted(list(fitnessEnvs))) + ',' + str(trainer.populations[None].scoreStats['min']) + ',' + str(trainer.populations[None].scoreStats['max']) + ',' + str(trainer.populations[None].scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + ',' + str(len(trainer.populations[None].teams)) + ',' + str(len(trainer.populations[None].rootTeams)) + '\n') # save model after every gen with open(trainerFileName, 'wb') as f: pickle.dump(trainer, f) # every 10 generations save all agent outcomes if trainer.populations[None].curGen % 10 == 0: for rt in trainer.populations[None].rootTeams: with open(logFileOutcomesName, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + str(rt.uid)) for envName in allEnvNames: f.write(',' + str(rt.outcomes.get(envName, '-999999'))) f.write('\n') # every 50 generations evaluate top agents on all games if trainer.populations[None].curGen % options.champEvalGen == 0: champEval(envNamesSrt, trainer, logFileChampionsName, pool, man, tstart, frames=options.testFrames, eps=options.testEps)
while trainer.populations[ allEnvNames[0]].curGen < 300: # 300 generations at each game print('TPG Gen: ' + str(trainer.populations[envName].curGen)) for envName in allEnvNames: # train on each env print('Playing Game: ' + envName) scoreList = man.list() # run all agents on env pool.map(runAgent, [(agent, envName, scoreList, 3, 18000, None) for agent in trainer.getAllAgents( skipTasks=[envName], noRef=True, popName=envName)]) trainer.applyScores(scoreList, popName=envName) # report curEnv results to log scoreStats = trainer.getTaskScores(envName, popName=envName) bestTeam = trainer.getBestTeams(tasks=[envName], popName=envName)[0] with open(logName, 'a') as f: f.write( str(trainer.populations[envName].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + envName + ',' + str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' + str(scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + '\n') # do evolution on each env played trainer.evolve(fitMethod='single',
agents = trainer.getAllAgents( skipTasks=[envName + '-' + str(frames)], noRef=True) # swap out agents only at start of generation if curGen == 1: frames = 1000 elif curGen == 25: frames = 5000 elif curGen == 50: frames = 18000 pool.map(runAgent, [(agent, envName, scoreList, 1, frames) for agent in agents]) # apply scores trainer.applyScores(scoreList) tasks = [envName + '-' + str(frames)] scoreStats = trainer.generateScoreStats(tasks=tasks) allScores.append( (envName, scoreStats['min'], scoreStats['max'], scoreStats['average'])) trainer.evolve(tasks=tasks, fitShare=False) # go into next gen # save model after every gen with open('saved-model-sgp.pkl', 'wb') as f: pickle.dump(trainer, f) # save best agent after every gen #with open('best-agent-sgp.pkl','wb') as f: # pickle.dump(trainer.getBestAgents(tasks=tasks)[0],f) print('Time Taken (Seconds): ' + str(time.time() - tStart))