def runPopulation(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, nRandFrames=30): # get num actions env = gym.make(envName) acts = env.action_space.n trainer = Trainer(actions=range(acts), teamPopSize=popSize) tStart = time.time() allScores = [] # track scores per gen for gen in range(gens): # do generations of training agents = trainer.getAgents() while True: # loop through agents of current generation if len(agents) == 0: break agent = agents.pop() if agent.taskDone(envName): continue score = 0 for i in range(reps): # repetitions of game state = env.reset() for j in range(frames): # frames of game # start random for stochasticity if j < nRandFrames: state, reward, isDone, debug = env.step(env.action_space.sample()) continue act = agent.act(getStateALE(np.array(state, dtype=np.int32))) state, reward, isDone, debug = env.step(act) score += reward # accumulate reward in score if isDone: break # end early if losing state agent.reward(score/reps, envName) print('Agent #' + str(agent.agentNum) + ' | Score: ' + str(score/reps)) # current generation done trainer.evolve(tasks=[envName]) # track stats scoreStats = trainer.fitnessStats allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average'])) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Gen: ' + str(gen)) print('Results so far: ' + str(allScores)) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0],score[1],score[2]) return trainer, allScores[-1]
def train(): tStart = time.time() # stack_size=4 envName = 'deadly_corridor.cfg' game = DoomGame() game.load_config(envName) game.set_sound_enabled(False) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.init() # acts = game.get_available_buttons_size() del game trainer = Trainer(actions=range(4), teamPopSize=30, rTeamPopSize=30) # trainer = loadTrainer('trainer.tn') # trainer = loadTrainer('trainer.tn') processes = 7 man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation for gen in range(100): # do 100 generations of training scoreList = man.list() # get agents, noRef to not hold reference to trainer in each one # don't need reference to trainer in multiprocessing agents = trainer.getAgents()# swap out agents only at start of generation # run the agents pool.map(runAgent, [(agent, envName, scoreList, 1, 2000) for agent in agents]) # apply scores, must do this when multiprocessing # because agents can't refer to trainer teams = trainer.applyScores(scoreList) # important to remember to set tasks right, unless not using task names # task name set in runAgent() trainer.evolve(tasks=[envName]) # go into next gen # an easier way to track stats than the above example scoreStats = trainer.fitnessStats allScores.append((scoreStats['min'], scoreStats['max'], scoreStats['average'])) print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Gen: ' + str(gen)) print('Results so far: ' + str(allScores)) # clear_output() print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0],score[1],score[2]) trainer.saveToFile('trainer.tn')
def runPopulationParallel(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, processes=4, nRandFrames=30): tStart = time.time() # get num actions env = gym.make(envName) acts = env.action_space.n del env trainer = Trainer(actions=range(acts), teamPopSize=popSize) man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation for gen in range(gens): # do generations of training scoreList = man.list() agents = trainer.getAgents( ) # swap out agents only at start of generation # run the agents pool.map(runAgentParallel, [(agent, envName, scoreList, reps, frames, nRandFrames) for agent in agents]) # prepare population for next gen teams = trainer.applyScores(scoreList) trainer.evolve(tasks=[envName]) # go into next gen # track stats scoreStats = trainer.fitnessStats allScores.append( (scoreStats['min'], scoreStats['max'], scoreStats['average'])) #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) #print('Gen: ' + str(gen)) #print('Results so far: ' + str(allScores)) print( f"Gen: {gen}, Best Score: {scoreStats['max']}, Time: {str((time.time() - tStart)/3600)}" ) print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0], score[1], score[2]) return trainer, allScores[-1]
print("Comment 1") if path.exists("Tzeentch") and path.exists("Magnus"): trainer = Trainer trainer = trainer.loadTrainer("Tzeentch") agents = trainer.getAgents() agent = Agent agent = agent.loadAgent("Magnus") print("yo as far as I understand the load was successful?") else: IQ = 0 trainer = Trainer(actions=range(30), teamPopSize=opts.popSize, rTeamPopSize=opts.popSize, sourceRange=310) agents = trainer.getAgents() agent = agents.pop() agent.saveToFile("Magnus") #psykerLevel = 0 trainer.saveToFile("Tzeentch") agentScores = [] curGen = 0 #psykerLevel += agent.psykerLevel lastState = None # create a log file global logName timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") logName = "log-{}.txt".format(timestamp)
def runPopulationParallel(envName="Boxing-v0", gens=1000, popSize=360, reps=3, frames=18000, processes=4, nRandFrames=30, rootBasedPop=True, memType=None, operationSet="full", rampancy=(5, 5, 5), traversal="team", do_real=False): tStart = time.time() ''' Python really is something special... sometimes it just deadlocks...¯\_(ツ)_/¯ https://pythonspeed.com/articles/python-multiprocessing/ ''' set_start_method("spawn") print("creating atari environment") # get num actions env = gym.make(envName) acts = env.action_space.n del env print("creating trainer") if do_real: trainer = Trainer(actions=[1, 1], teamPopSize=popSize, rootBasedPop=rootBasedPop, memType=memType, operationSet=operationSet, rampancy=rampancy, traversal=traversal) else: trainer = Trainer(actions=acts, teamPopSize=popSize, rootBasedPop=rootBasedPop, memType=memType, operationSet=operationSet, rampancy=rampancy, traversal=traversal) trainer.configFunctions() #print(1/0) man = mp.Manager() pool = mp.Pool(processes=processes, maxtasksperchild=1) allScores = [] # track all scores each generation print("running generations") for gen in range(gens): # do generations of training print("doing generation {}".format(gen)) scoreList = man.list() agents = trainer.getAgents( ) # swap out agents only at start of generation agent = agents[0] try: # run the agents pool.map(runAgentParallel, [ (agent, envName, scoreList, reps, frames, nRandFrames, do_real) for agent in agents ]) except Exception as mpException: print( "Exception occured while running multiprocessing via pool.map!" ) print(mpException) raise mpException # prepare population for next gen print("Applying gen {} scores to agents".format(gen)) teams = trainer.applyScores(scoreList) print("Getting champion") champ = trainer.getAgents(sortTasks=[envName])[0].team print("Evolving population") trainer.evolve(tasks=[envName]) # go into next gen # track stats scoreStats = trainer.fitnessStats allScores.append( (scoreStats['min'], scoreStats['max'], scoreStats['average'])) #print('Time Taken (Hours): ' + str((time.time() - tStart)/3600)) #print('Gen: ' + str(gen)) #print('Results so far: ' + str(allScores)) print( "teams: {}, rTeams: {}, learners: {}, Champ Teams: {}, Champ Learners: {}, Champ Instructions: {}." .format( len(trainer.teams), len(trainer.rootTeams), len(trainer.learners), len(getTeams(champ)), len(getLearners(champ)), learnerInstructionStats(getLearners(champ), trainer.operations))) #print(actionInstructionStats(getLearners(champ), trainer.operations)) #print(1/0) print( f"Gen: {gen}, Best Score: {scoreStats['max']}, Avg Score: {scoreStats['average']}, Time: {str((time.time() - tStart)/3600)}" ) print(pathDepths(champ)) print('Time Taken (Hours): ' + str((time.time() - tStart) / 3600)) print('Results:\nMin, Max, Avg') for score in allScores: print(score[0], score[1], score[2]) return trainer, allScores[-1]