def run(self): learner = False if self.args.load == True and self.args.mode == 'test': load = True else: load = False neural_networks = gen_neural_networks(self.args, self.netdata, self.args.tsc, self.netdata['inter'].keys(), learner, load, self.args.n_hidden) print('sim proc ' + str(self.idx) + ' waiting at barrier ---------') write_to_log(' ACTOR #' + str(self.idx) + ' WAITING AT SYNC WEIGHTS BARRIER...') self.barrier.wait() write_to_log(' ACTOR #' + str(self.idx) + ' BROKEN SYNC BARRIER...') if self.args.l > 0 and self.args.mode == 'train': neural_networks = self.sync_nn_weights(neural_networks) #barrier #grab weights from learner or load from file #barrier if self.args.mode == 'train': while not self.finished_updates(): self.run_sim(neural_networks) if (self.eps == 1.0 or self.eps < 0.02): self.write_to_csv(self.sim.sim_stats()) #self.write_travel_times() self.sim.close() elif self.args.mode == 'test': print( str(self.idx) + ' test waiting at offset ------------- ' + str(self.offset)) print( str(self.idx) + ' test broken offset =================== ' + str(self.offset)) self.initial = False #just run one sim for stats self.run_sim(neural_networks) if (self.eps == 1.0 or self.eps < 0.02) and self.args.mode == 'test': self.write_to_csv(self.sim.sim_stats()) with open(str(self.eps) + '.csv', 'a+') as f: f.write('-----------------\n') self.write_sim_tsc_metrics() #self.write_travel_times() self.sim.close() print('------------------\nFinished on sim process ' + str(self.idx) + ' Closing\n---------------')
def run_sim(self, neural_networks): start_t = time.time() self.sim.gen_sim() if self.initial is True: #if the initial sim, run until the offset time reached self.initial = False self.sim.run_offset(self.offset) print( str(self.idx) + ' train waiting at offset ------------- ' + str(self.offset) + ' at ' + str(get_time_now())) write_to_log(' ACTOR #' + str(self.idx) + ' FINISHED RUNNING OFFSET ' + str(self.offset) + ' to time ' + str(self.sim.t) + ' , WAITING FOR OTHER OFFSETS...') self.barrier.wait() print( str(self.idx) + ' train broken offset =================== ' + str(self.offset) + ' at ' + str(get_time_now())) write_to_log(' ACTOR #' + str(self.idx) + ' BROKEN OFFSET BARRIER...') self.sim.create_tsc(self.rl_stats, self.exp_replays, self.eps, neural_networks) write_to_log('ACTOR #' + str(self.idx) + ' START RUN SIM...') self.sim.run() print('sim finished in ' + str(time.time() - start_t) + ' on proc ' + str(self.idx)) write_to_log('ACTOR #' + str(self.idx) + ' FINISHED SIM...')
def run(self): # gen neural networks learner = True neural_networks = gen_neural_networks(self.args, self.netdata, self.args.tsc, self.agent_ids, learner, self.args.load, self.args.n_hidden) print('learner proc trying to send weights------------') write_to_log(' LEARNER #'+str(self.idx)+' SENDING WEIGHTS...') neural_networks = self.distribute_weights(neural_networks) # wait for all procs to sync weights print('learner waiting at barrier ------------') write_to_log(' LEARNER #'+str(self.idx)+' FINISHED SENDING WEIGHTS, WAITING AT BARRIER...') self.barrier.wait() write_to_log(' LEARNER #'+str(self.idx)+' GENERATING AGENTS...') if self.args.load_replay: self.load_replays() # create agents agents = self.gen_agents(neural_networks) print('learner proc '+str(self.idx)+' waiting at offset barrier------------') write_to_log(' LEARNER #'+str(self.idx)+' FINISHED GEN AGENTS, WAITING AT OFFSET BARRIER...') self.barrier.wait() write_to_log(' LEARNER #'+str(self.idx)+' BROKEN OFFSET BARRIER...') print('learner proc '+str(self.idx)+' broken offset barrier ------------') self.save_t = time.time() othert = time.time() # keep looping until all agents have # achieved sufficient batch updates while not self.finished_learning(self.agent_ids): for tsc in self.agent_ids: # wait until exp replay buffer full if len(self.exp_replay[tsc]) >= self.args.nreplay: # reset the number of experiences once when the # exp replay is filled for the first time if self.rl_stats[tsc]['updates'] == 0: if self.args.save: self.save_replays() print(tsc+' exp replay full, beginning batch updates********') # write_to_log(' LEARNER #'+str(self.idx)+' START LEARNING '+str(tsc)) self.rl_stats[tsc]['n_exp'] = len(self.exp_replay[tsc]) if self.rl_stats[tsc]['updates'] < self.args.updates and self.rl_stats[tsc]['n_exp'] > 0: for i in range(min(self.rl_stats[tsc]['n_exp'], 4)): agents[tsc].train_batch(self.args.target_freq) agents[tsc].clip_exp_replay() t = time.time() if t - othert > 90: othert = t n_replay = [str(len(self.exp_replay[i])) for i in self.agent_ids] updates = [str(self.rl_stats[i]['updates']) for i in self.agent_ids] nexp = [str(self.rl_stats[i]['n_exp']) for i in self.agent_ids] write_to_log(' LEARNER #'+str(self.idx)+'\n'+str(self.agent_ids)+'\n'+str(nexp)+'\n'+str(n_replay)+'\n'+str(updates)) # save weights periodically if self.args.save: if self.time_to_save(): self.save_weights(neural_networks) # write agent training progress # only on one learner if self.idx == 0: self.write_progress() write_to_log(' LEARNER #'+str(self.idx)+' FINISHED TRAINING LOOP ===========') if self.idx == 0: # if other agents arent finished learning # keep updating progress while not self.finished_learning(self.tsc_ids): if self.time_to_save(): self.write_progress() if self.args.save: self.save_weights(neural_networks) print('finished learning for all agents on learner proc '+str(self.idx)) n_replay = [str(len(self.exp_replay[i])) for i in self.agent_ids] write_to_log(' LEARNER #'+str(self.idx)+' FINISHED REPLAY '+str(n_replay)) updates = [str(self.rl_stats[i]['updates']) for i in self.agent_ids] write_to_log(' LEARNER #'+str(self.idx)+' FINISHED UPDATES'+str(updates))