def calc_df_row(self, env): '''Calculate a row for updating train_df or eval_df.''' frame = self.env.clock.frame wall_t = self.env.clock.wall_t fps = 0 if wall_t == 0 else frame / wall_t with warnings.catch_warnings(): # mute np.nanmean warning warnings.filterwarnings('ignore') total_reward = np.nanmean(env.total_reward) # guard for vec env # update debugging variables if net_util.to_check_train_step(): grad_norms = net_util.get_grad_norms(self.agent.algorithm) self.mean_grad_norm = np.nan if ps.is_empty(grad_norms) else np.mean(grad_norms) row = pd.Series({ # epi and frame are always measured from training env 'epi': self.env.clock.epi, # t and reward are measured from a given env or eval_env 't': env.clock.t, 'wall_t': wall_t, 'opt_step': self.env.clock.opt_step, 'frame': frame, 'fps': fps, 'total_reward': total_reward, 'total_reward_ma': np.nan, # update outside 'loss': self.loss, 'lr': self.get_mean_lr(), 'explore_var': self.explore_var, 'entropy_coef': self.entropy_coef if hasattr(self, 'entropy_coef') else np.nan, 'entropy': self.mean_entropy, 'grad_norm': self.mean_grad_norm, }, dtype=np.float32) assert all(col in self.train_df.columns for col in row.index), f'Mismatched row keys: {row.index} vs df columns {self.train_df.columns}' return row
def flush(self): '''Update and flush gradient-related variables after training step similar.''' # update self.mean_entropy = torch.tensor(self.entropies).mean().item() self.mean_log_prob = torch.tensor(self.log_probs).mean().item() # net.grad_norms is only available in dev mode for efficiency grad_norms = net_util.get_grad_norms(self.agent.algorithm) self.mean_grad_norm = np.nan if ps.is_empty(grad_norms) else np.mean( grad_norms) # flush self.action_tensor = None self.action_pd = None self.entropies = [] self.log_probs = []