Exemplo n.º 1
0
    def run(self):
        learner = False
        if self.args.load == True and self.args.mode == 'test':
            load = True
        else:
            load = False

        neural_networks = gen_neural_networks(self.args, self.netdata,
                                              self.args.tsc,
                                              self.netdata['inter'].keys(),
                                              learner, load,
                                              self.args.n_hidden)

        print('sim proc ' + str(self.idx) + ' waiting at barrier ---------')
        write_to_log(' ACTOR #' + str(self.idx) +
                     ' WAITING AT SYNC WEIGHTS BARRIER...')
        self.barrier.wait()
        write_to_log(' ACTOR #' + str(self.idx) + '  BROKEN SYNC BARRIER...')
        if self.args.l > 0 and self.args.mode == 'train':
            neural_networks = self.sync_nn_weights(neural_networks)
        #barrier
        #grab weights from learner or load from file
        #barrier

        if self.args.mode == 'train':
            while not self.finished_updates():
                self.run_sim(neural_networks)
                if (self.eps == 1.0 or self.eps < 0.02):
                    self.write_to_csv(self.sim.sim_stats())
                #self.write_travel_times()
                self.sim.close()

        elif self.args.mode == 'test':
            print(
                str(self.idx) + ' test  waiting at offset ------------- ' +
                str(self.offset))
            print(
                str(self.idx) + ' test broken offset =================== ' +
                str(self.offset))
            self.initial = False
            #just run one sim for stats
            self.run_sim(neural_networks)
            if (self.eps == 1.0
                    or self.eps < 0.02) and self.args.mode == 'test':
                self.write_to_csv(self.sim.sim_stats())
                with open(str(self.eps) + '.csv', 'a+') as f:
                    f.write('-----------------\n')
            self.write_sim_tsc_metrics()
            #self.write_travel_times()
            self.sim.close()
        print('------------------\nFinished on sim process ' + str(self.idx) +
              ' Closing\n---------------')
Exemplo n.º 2
0
    def run_sim(self, neural_networks):
        start_t = time.time()
        self.sim.gen_sim()

        if self.initial is True:
            #if the initial sim, run until the offset time reached
            self.initial = False
            self.sim.run_offset(self.offset)
            print(
                str(self.idx) + ' train  waiting at offset ------------- ' +
                str(self.offset) + ' at ' + str(get_time_now()))
            write_to_log(' ACTOR #' + str(self.idx) +
                         ' FINISHED RUNNING OFFSET ' + str(self.offset) +
                         ' to time ' + str(self.sim.t) +
                         ' , WAITING FOR OTHER OFFSETS...')
            self.barrier.wait()
            print(
                str(self.idx) + ' train  broken offset =================== ' +
                str(self.offset) + ' at ' + str(get_time_now()))
            write_to_log(' ACTOR #' + str(self.idx) +
                         '  BROKEN OFFSET BARRIER...')

        self.sim.create_tsc(self.rl_stats, self.exp_replays, self.eps,
                            neural_networks)
        write_to_log('ACTOR #' + str(self.idx) + '  START RUN SIM...')
        self.sim.run()
        print('sim finished in ' + str(time.time() - start_t) + ' on proc ' +
              str(self.idx))
        write_to_log('ACTOR #' + str(self.idx) + '  FINISHED SIM...')
Exemplo n.º 3
0
    def run(self):
        # gen neural networks
        learner = True
        
        neural_networks = gen_neural_networks(self.args, 
                                              self.netdata, 
                                              self.args.tsc, 
                                              self.agent_ids,
                                              learner,
                                              self.args.load,
                                              self.args.n_hidden)

        print('learner proc trying to send weights------------')
        write_to_log(' LEARNER #'+str(self.idx)+' SENDING WEIGHTS...')

        neural_networks = self.distribute_weights(neural_networks) 
        # wait for all procs to sync weights
        print('learner waiting at barrier ------------')
        write_to_log(' LEARNER #'+str(self.idx)+' FINISHED SENDING WEIGHTS, WAITING AT BARRIER...')
        self.barrier.wait()
        write_to_log(' LEARNER #'+str(self.idx)+' GENERATING AGENTS...')

        if self.args.load_replay:
            self.load_replays()

        # create agents
        agents = self.gen_agents(neural_networks)

        print('learner proc '+str(self.idx)+' waiting at offset barrier------------')
        write_to_log(' LEARNER #'+str(self.idx)+' FINISHED GEN AGENTS, WAITING AT OFFSET BARRIER...')
        self.barrier.wait()
        write_to_log(' LEARNER #'+str(self.idx)+' BROKEN OFFSET BARRIER...')
        print('learner proc '+str(self.idx)+' broken offset barrier ------------')

        self.save_t = time.time()
        othert = time.time()
        # keep looping until all agents have
        # achieved sufficient batch updates
        while not self.finished_learning(self.agent_ids):
            for tsc in self.agent_ids:
                # wait until exp replay buffer full
                if len(self.exp_replay[tsc]) >= self.args.nreplay:
                    # reset the number of experiences once when the
                    # exp replay is filled for the first time
                    if self.rl_stats[tsc]['updates'] == 0:
                        if self.args.save:
                            self.save_replays()
                        print(tsc+' exp replay full, beginning batch updates********')
                        # write_to_log(' LEARNER #'+str(self.idx)+' START LEARNING '+str(tsc))
                        self.rl_stats[tsc]['n_exp'] = len(self.exp_replay[tsc])
                    if self.rl_stats[tsc]['updates'] < self.args.updates and self.rl_stats[tsc]['n_exp'] > 0: 
                        for i in range(min(self.rl_stats[tsc]['n_exp'], 4)):
                           agents[tsc].train_batch(self.args.target_freq)
                        agents[tsc].clip_exp_replay()

            t = time.time()
            if t - othert > 90:
                othert = t
                n_replay = [str(len(self.exp_replay[i])) for i in self.agent_ids]
                updates = [str(self.rl_stats[i]['updates']) for i in self.agent_ids]
                nexp = [str(self.rl_stats[i]['n_exp']) for i in self.agent_ids]
                write_to_log(' LEARNER #'+str(self.idx)+'\n'+str(self.agent_ids)+'\n'+str(nexp)+'\n'+str(n_replay)+'\n'+str(updates))                           

            # save weights periodically
            if self.args.save:
                if self.time_to_save():
                    self.save_weights(neural_networks)

                    # write agent training progress
                    # only on one learner
                    if self.idx == 0:
                        self.write_progress()
        write_to_log(' LEARNER #'+str(self.idx)+' FINISHED TRAINING LOOP ===========')

        if self.idx == 0:
            # if other agents arent finished learning
            # keep updating progress
            while not self.finished_learning(self.tsc_ids):
                if self.time_to_save():
                    self.write_progress()

        if self.args.save:
            self.save_weights(neural_networks)
        print('finished learning for all agents on learner proc '+str(self.idx))
        n_replay = [str(len(self.exp_replay[i])) for i in self.agent_ids]
        write_to_log(' LEARNER #'+str(self.idx)+' FINISHED REPLAY '+str(n_replay))
        updates = [str(self.rl_stats[i]['updates']) for i in self.agent_ids]
        write_to_log(' LEARNER #'+str(self.idx)+' FINISHED UPDATES'+str(updates))