def create_episodes(self, gamma=0.95, gae_lambda=1.0, device='cpu'): # 初始化 episodes,用于保存 完整的轨迹数据 # 将sample_trajectories函数采样 batch_size 个完整的轨迹保存至 episodes episodes = BatchEpisodes(batch_size=self.batch_size, gamma=gamma, device=device) episodes.log('_createdAt', datetime.now(timezone.utc)) # episodes.log('process_name', self.name) # t0 = time.time() """ ****************************************************************** """ for item in self.sample_trajectories(): episodes.append(*item) episodes.log('duration', time.time() - t0) self.baseline.fit(episodes) episodes.compute_advantages(self.baseline, gae_lambda=gae_lambda, normalize=True) return episodes
def create_episodes(self, params=None, gamma=0.95, gae_lambda=1.0, device='cpu'): episodes = BatchEpisodes(batch_size=self.batch_size, gamma=gamma, device=device) episodes.log('_createdAt', datetime.now(timezone.utc)) episodes.log('process_name', self.name) t0 = time.time() for item in self.sample_trajectories(params=params): episodes.append(*item) episodes.log('duration', time.time() - t0) self.baseline.fit(episodes) episodes.compute_advantages(self.baseline, gae_lambda=gae_lambda, normalize=True) return episodes