class TorchDriverMaster: tester="hello" # this is called on game start def begin_play(self): global master master = self self.replay_buffer = ReplayBuffer(max_size=50000) ue.log('Begin Play on TorchActor class') ue.log("Has CUDA: {}".format(torch.cuda.is_available())) self.policy = TD3(lr, state_dim, action_dim, max_action) self.frame = 0 self.policy.load(directory, loadfilename) self.episode = 0 self.worker_id = 0 self.writer = SummaryWriter(os.path.join(directory, filename)) def get_next_ep(self): self.episode += 1 return self.episode def get_id(self): retid = self.worker_id self.worker_id += 1 return retid def write_data(self,ep_reward, ep_reward_avg): real_ep = self.episode self.writer.add_scalar('ep_reward', ep_reward, real_ep) self.writer.add_scalar('ep_avg_reward', ep_reward_avg, real_ep) print("finished ep {}, avgscore: {}".format(real_ep, ep_reward_avg)) self.episode += 1 def transfer_buffer(self, buffer): self.replay_buffer.mergein(buffer) print("buffer merged, length: {}".format(self.replay_buffer.size)) def tick(self, delta_time): self.frame += 1 if self.replay_buffer.size: al, c1l, c2l, prl = self.policy.update(self.replay_buffer, 1, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay) if self.frame % 60 == 0: print("aloss:{}".format(al)) if self.frame % 600 == 0: self.policy.save(directory, filename)
class TorchWalkerMaster: # this is called on game start def begin_play(self): global master master = self self.replay_buffer = ReplayBuffer(max_size=200000) ue.log('Begin Play on TorchWalkerMaster class') ue.log("Has CUDA: {}".format(torch.cuda.is_available())) self.policy = TD3(lr, state_dim, action_dim, max_action) self.frame = 0 if loadpol: self.policy.load(directory, loadfilename) self.episode = 0 self.worker_id = 0 self.writer = SummaryWriter(os.path.join(directory, filename)) self.can_thread = True def get_next_ep(self): self.episode += 1 return self.episode def get_id(self): retid = self.worker_id self.worker_id += 1 return retid def write_data(self, ep_reward, ep_reward_avg, ep_frame): real_ep = self.episode self.writer.add_scalar('ep_reward', ep_reward, real_ep) self.writer.add_scalar('ep_avg_reward', ep_reward_avg, real_ep) self.writer.add_scalar('ep_frame', ep_frame, real_ep) #print("finished ep {}, avgscore: {}".format(real_ep, ep_reward_avg)) self.episode += 1 def transfer_buffer(self, buffer): self.replay_buffer.mergein(buffer) #print("buffer merged, length: {}".format(self.replay_buffer.size)) def thread_func(self): if self.replay_buffer.size: al, c1l, c2l, prl = self.policy.update(self.replay_buffer, 200, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay) print("aloss:{}, frame:{}, mem:{}".format(al, self.frame, self.replay_buffer.size)) self.writer.add_scalar('actor_loss', al, self.frame) self.writer.add_scalar('c1_loss', c1l, self.frame) self.writer.add_scalar('c2_loss', c2l, self.frame) else: print("skipping") time.sleep(0.01) self.can_thread = True def thread_func_crit(self): if self.replay_buffer.size: al, c1l, c2l, prl = self.policy.update(self.replay_buffer, 200, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay) print("aloss:{}, frame:{}, mem:{}".format(al, self.frame, self.replay_buffer.size)) self.writer.add_scalar('actor_loss', al, self.frame) self.writer.add_scalar('c1_loss', c1l, self.frame) self.writer.add_scalar('c2_loss', c2l, self.frame) else: print("skipping") time.sleep(0.01) self.can_thread = True def tick(self, delta_time): self.frame += 1 if self.replay_buffer.size < 10000: # if self.can_thread: # x = threading.Thread(target=self.thread_func_crit)#, args=(1,)) # x.start() # self.can_thread = False return if self.can_thread: x = threading.Thread(target=self.thread_func) #, args=(1,)) x.start() self.can_thread = False if self.frame % 600 == 0: self.policy.save(directory, filename)