def work(self, COORD, render=False): """Main function of the Workers. This runs the environment and the experience is used to update the main Actor Critic Network. """ #Allowing access to the global variables. while not COORD.should_stop() and self.sess.run( self.global_step) < self.settings["MaxEpisodes"]: self.sess.run(self.global_step_next) logging = interval_flag(self.sess.run(self.global_step), self.settings["LogFreq"], 'log') saving = interval_flag(self.sess.run(self.global_step), self.settings["SaveFreq"], 'save') s0 = self.env.reset() for j in range(self.settings["MaxEpisodeSteps"] + 1): a, networkData = self.net.GetAction(state=s0) s1, r, done, _ = self.env.step(a) if render: self.env.render() self.net.AddToTrajectory([s0, a, r, s1, done] + networkData) s0 = s1 if done or j == self.settings[ "MaxEpisodeSteps"]: # update global and assign to local net self.net.Update(self.settings["NetworkHPs"], self.sess.run(self.global_step)) break self.progbar.update(self.sess.run(self.global_step)) if saving: self.saver.save(self.sess, self.MODEL_PATH + '/ctf_policy.ckpt', global_step=self.sess.run(self.global_step)) if logging: loggingDict = self.env.getLogging() dict = self.net.GetStatistics() loggingDict.update(dict) Record(loggingDict, self.writer, self.sess.run(self.global_step))
def work(self, COORD, render=False): """Main function of the Workers. This runs the environment and the experience is used to update the main Actor Critic Network. """ #Allowing access to the global variables. while not COORD.should_stop() and self.sess.run( self.global_step) < self.settings["MaxEpisodes"]: logging = interval_flag(self.sess.run(self.global_step), self.settings["LogFreq"], 'logNet') self.net.Update(self.settings["NetworkHPs"], self.sess.run(self.global_step)) if logging: loggingDict = self.net.GetStatistics() Record(loggingDict, self.writer, self.sess.run(self.global_step))
def work(self, COORD, render=False): """Main function of the Workers. This runs the environment and the experience is used to update the main Actor Critic Network. """ #Allowing access to the global variables. while not COORD.should_stop() and self.sess.run( self.global_step) < self.settings["MaxEpisodes"]: self.sess.run(self.global_step_next) logging = interval_flag(self.sess.run(self.global_step), self.settings["LogFreq"], 'log') saving = interval_flag(self.sess.run(self.global_step), self.settings["SaveFreq"], 'save') #Initializing environment and storage variables: s0 = self.env.reset() a_past = [0] r_i_past = [0.0] r_e_past = [0.0] for j in range(self.settings["MaxEpisodeSteps"] + 1): a, networkData = self.net.GetAction(state=s0, episode=self.sess.run( self.global_step), step=j, a_past=a_past, r_i_past=r_i_past, r_e_past=r_e_past) #networkData is expected to be [betaVal,betaOH] s1, r, done, _ = self.env.step(a) if render: self.env.render() #Calculating Intrinsic Reward of the state: r_intrinsic = self.net.GetIntrinsicReward(s0, s1) r_total = r + networkData[0] * r_intrinsic #Adding to the trajectory self.net.AddToTrajectory( [s0, a, r_total, s1, done, a_past, r_i_past, r_e_past] + networkData) #Updating the storage variables. s0 = s1 a_past = a r_i_past = [r_intrinsic] r_e_past = r #Pushing entire trajectory to the buffer if done or j == self.settings["MaxEpisodeSteps"]: self.net.PushToBuffer() break self.progbar.update(self.sess.run(self.global_step)) if logging: loggingDict = self.env.getLogging() Record(loggingDict, self.writer, self.sess.run(self.global_step)) if saving: self.saver.save(self.sess, self.MODEL_PATH + '/ctf_policy.ckpt', global_step=self.sess.run(self.global_step))
writer = tf.summary.FileWriter(LOG_PATH,graph=sess.graph) saver = tf.train.Saver(max_to_keep=3, var_list=net.getVars+[global_step]) net.InitializeVariablesFromFile(saver,MODEL_PATH_) InitializeVariables(sess) #Included to catch if there are any uninitalized variables. progbar = tf.keras.utils.Progbar(None, unit_name='Training',stateful_metrics=["Reward"]) loggingFunctions=[] for loggingFunc in settings["LoggingFunctions"]: func = GetFunction(loggingFunc) loggingFunctions.append(func(env,net,IMAGE_PATH)) for i in range(settings["MAX_EP"]): sess.run(global_step_next) logging = interval_flag(sess.run(global_step), settings["LogFreq"], 'log') saving = interval_flag(sess.run(global_step), settings["SaveFreq"], 'save') s0 = env.reset() for j in range(settings["MAX_EP_STEPS"]+1): updating = interval_flag(j, settings['UPDATE_GLOBAL_ITER'], 'update') a_hier, networkData = net.GetAction(state=s0,episode=sess.run(global_step),step=j) a = UseSubpolicy(s0,a_hier) s1,r,done,_ = env.step(action=a) net.AddToTrajectory([s0,a_hier,r,s1,done]+networkData) s0 = s1 if updating: # update global and assign to local net
InitializeVariables( sess) #Included to catch if there are any uninitalized variables. progbar = tf.keras.utils.Progbar(None, unit_name='Training', stateful_metrics=["Reward"]) if "LoggingFunctions" in settings: loggingFunctions = [] for loggingFunc in settings["LoggingFunctions"]: func = GetFunction(loggingFunc) loggingFunctions.append(func(env, net, IMAGE_PATH)) for i in range(settings["MaxEpisodes"]): sess.run(global_step_next) logging = interval_flag(sess.run(global_step), settings["LogFreq"], 'log') saving = interval_flag(sess.run(global_step), settings["SaveFreq"], 'save') s0 = env.reset(next_config=net.next_task()) for j in range(settings["MaxEpisodeSteps"] + 1): a, networkData = net.GetAction(state=s0, episode=sess.run(global_step), step=j) s1, r, done, info = env.step(action=a) net.AddToTrajectory([s0, a, r, s1, done] + networkData) if args.render: env.render() s0 = s1