def main(): # initiates trainer, actions are these predictions of the agent: # action 1 -> right box will fall off the edge # action 0 -> right box will not fall off trainer = TpgTrainer(actions=[0, 1], teamPopSize=50) _min, _max, _avg = [], [], [] # hold values for every generation for gen in range(GENERATIONS): # generation loop print("Generation: ", gen + 1, "/", GENERATIONS) curScores = [] # new list per gen while True: # loop to go through agents agent = trainer.getNextAgent() if agent is None: break # no more agents, so proceed to next gen # evaluting the agent score = 0 for i in range(EVALUATIONS): score += evaluateAgent(agent) agent.reward(score) curScores.append(score) print("Min:", min(curScores), " Max:", max(curScores), " Avg:", sum(curScores) / len(curScores), "(out of " + str(EVALUATIONS) + ")\n") _min.append(min(curScores)) _max.append(max(curScores)) _avg.append(sum(curScores) / len(curScores)) trainer.evolve() # getting best agent after all the generations best_agent, best_score = getBestAgent(trainer) print("Best agent's score:", best_score, "/", EVALUATIONS) for run in range(FINAL_RUNS): print("Final run: ", run + 1, "/", FINAL_RUNS, end='\r') evaluateAgent(best_agent, graphics=True) # plotting progress over the generations generations = range(1, GENERATIONS + 1) axes = plt.gca() axes.set_ylim([0, EVALUATIONS]) plt.plot(generations, _min, label="min") plt.plot(generations, _max, label="max") plt.plot(generations, _avg, label="avg") plt.xlabel("generation") plt.ylabel("score") plt.legend() plt.show()
def runTpg(): tmpEnv = gym.make(options.envName) # create TPG trainer = TpgTrainer(actions=range(tmpEnv.action_space.n), teamPopSize=options.popSize, rTeamPopSize=options.popSize, maxProgramSize=128) tmpEnv.close() print('Playing Game: ' + options.envName) while trainer.populations[ None].curGen < options.generations: # train indefinately print('TPG Gen: ' + str(trainer.populations[None].curGen)) scoreList = man.list() # run all agents on env pool.map(runAgent, [(agent, options.envName, scoreList, options.trainEps, options.trainFrames, None) for agent in trainer.getAllAgents( skipTasks=[options.envName], noRef=True)]) trainer.applyScores(scoreList) # do evolution after all envs played trainer.evolve(tasks=[options.envName], elitistTasks=[options.envName]) # report generational fitness results bestTeam = trainer.getBestTeams(tasks=[options.envName])[0] with open(logFileTpg, 'a') as f: f.write( str(trainer.populations[None].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + options.envName + ',' + str(trainer.populations[None].scoreStats['min']) + ',' + str(trainer.populations[None].scoreStats['max']) + ',' + str(trainer.populations[None].scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + ',' + str(len(trainer.populations[None].teams)) + ',' + str(len(trainer.populations[None].rootTeams)) + '\n')
[ (agent, game, scoreQueue) #(agent, envQueue, scoreQueue) for agent in trainer.getAllAgents(skipTasks=[]) ]) scores = [] # convert scores into list while not scoreQueue.empty(): scores.append(scoreQueue.get()) # save model before every evolve in case issue with open('gvgai-model-1be.pkl', 'wb') as f: pickle.dump(trainer, f) # apply scores trainer.applyScores(scores) trainer.evolve() # go into next gen # save model after every gen with open('gvgai-model-1ae.pkl', 'wb') as f: pickle.dump(trainer, f) # at end of generation, make summary of scores summaryScores.append((trainer.scoreStats['min'], trainer.scoreStats['max'], trainer.scoreStats['average'])) # min, max, avg print(chr(27) + "[2J") print('Time Taken (Seconds): ' + str(time.time() - tStart)) print('Results so far: ' + str(summaryScores)) print(chr(27) + "[2J") print('Time Taken (Seconds): ' + str(time.time() - tStart)) print('Results: ' + str(summaryScores))
act = agent.act(getState(state), valActs=valActs) # get action from agent # feedback from env state, reward, isDone, debug = env.step(act) score += reward # accumulate reward in score if isDone: break # end early if losing state agent.reward(score) # must reward agent (if didn't already score) print('Agent #' + str(agent.getAgentNum()) + ' finished with score ' + str(score)) curScores.append(score) # store score trainer.evolve() # save model after every gen with open('gvgai-model-1.pkl', 'wb') as f: pickle.dump(trainer, f) # at end of generation, make summary of scores summaryScores.append((min(curScores), max(curScores), sum(curScores) / len(curScores))) # min, max, avg print(chr(27) + "[2J") print('Time Taken (Seconds): ' + str(time.time() - tStart)) print('Results so far: ' + str(summaryScores)) #clear_output(wait=True) print('Time Taken (Seconds): ' + str(time.time() - tStart))
# run all agents on env pool.map(runAgent, [(agent, envName, scoreList, 3, 18000, None) for agent in trainer.getAllAgents( skipTasks=[envName], noRef=True, popName=envName)]) trainer.applyScores(scoreList, popName=envName) # report curEnv results to log scoreStats = trainer.getTaskScores(envName, popName=envName) bestTeam = trainer.getBestTeams(tasks=[envName], popName=envName)[0] with open(logName, 'a') as f: f.write( str(trainer.populations[envName].curGen) + ',' + str((time.time() - tstart) / 3600) + ',' + envName + ',' + str(scoreStats['min']) + ',' + str(scoreStats['max']) + ',' + str(scoreStats['average']) + ',' + str(len(bestTeam.getRootTeamGraph()[0])) + ',' + str(bestTeam.uid) + '\n') # do evolution on each env played trainer.evolve(fitMethod='single', tasks=[envName], elitistTasks=[envName], popName=envName) # update most recent model results with open('trainer-8-pops.pkl', 'wb') as f: pickle.dump(trainer, f)
frames = 1000 elif curGen == 25: frames = 5000 elif curGen == 50: frames = 18000 pool.map(runAgent, [(agent, envName, scoreList, 1, frames) for agent in agents]) # apply scores trainer.applyScores(scoreList) tasks = [envName + '-' + str(frames)] scoreStats = trainer.generateScoreStats(tasks=tasks) allScores.append( (envName, scoreStats['min'], scoreStats['max'], scoreStats['average'])) trainer.evolve(tasks=tasks, fitShare=False) # go into next gen # save model after every gen with open('saved-model-sgp.pkl', 'wb') as f: pickle.dump(trainer, f) # save best agent after every gen #with open('best-agent-sgp.pkl','wb') as f: # pickle.dump(trainer.getBestAgents(tasks=tasks)[0],f) print('Time Taken (Seconds): ' + str(time.time() - tStart)) print('On Generation: ' + str(curGen)) #print('Results: ', str(allScores)) with open(logFileName, 'a') as f: f.write( str(curGen) + ' | ' + str(envName) + ' | ' +
allScores = [] # track all scores each generation tStart = time.time() logFileName = 'train-log-' + datetime.datetime.now().strftime( "%Y-%m-%d-%H-%M") + '.txt' while True: # do generations with no end scoreList = man.list() pool.map(runAgent, [(agent, scoreList, 0.05) for agent in trainer.getAllAgents(noRef=True)]) # apply scores trainer.applyScores(scoreList) trainer.evolve(fitShare=False, tasks=[]) scoreStats = trainer.scoreStats allScores.append((trainer.curGen, scoreStats['min'], scoreStats['max'], scoreStats['average'])) # save model after every gen with open('saved-model-1.pkl', 'wb') as f: pickle.dump(trainer, f) # save best agent each generation #with open('best-agent.pkl','wb') as f: # pickle.dump(trainer.getBestAgent(tasks=[]), f) print(chr(27) + "[2J") print('Time So Far (Seconds): ' + str(time.time() - tStart)) print('Results: ', str(allScores))