Пример #1
0
    def calc_df_row(self, env):
        '''Calculate a row for updating train_df or eval_df.'''
        frame = self.env.clock.frame
        wall_t = self.env.clock.wall_t
        fps = 0 if wall_t == 0 else frame / wall_t
        with warnings.catch_warnings():  # mute np.nanmean warning
            warnings.filterwarnings('ignore')
            total_reward = np.nanmean(env.total_reward)  # guard for vec env

        # update debugging variables
        if net_util.to_check_train_step():
            grad_norms = net_util.get_grad_norms(self.agent.algorithm)
            self.mean_grad_norm = np.nan if ps.is_empty(grad_norms) else np.mean(grad_norms)

        row = pd.Series({
            # epi and frame are always measured from training env
            'epi': self.env.clock.epi,
            # t and reward are measured from a given env or eval_env
            't': env.clock.t,
            'wall_t': wall_t,
            'opt_step': self.env.clock.opt_step,
            'frame': frame,
            'fps': fps,
            'total_reward': total_reward,
            'total_reward_ma': np.nan,  # update outside
            'loss': self.loss,
            'lr': self.get_mean_lr(),
            'explore_var': self.explore_var,
            'entropy_coef': self.entropy_coef if hasattr(self, 'entropy_coef') else np.nan,
            'entropy': self.mean_entropy,
            'grad_norm': self.mean_grad_norm,
        }, dtype=np.float32)
        assert all(col in self.train_df.columns for col in row.index), f'Mismatched row keys: {row.index} vs df columns {self.train_df.columns}'
        return row
Пример #2
0
    def flush(self):
        '''Update and flush gradient-related variables after training step similar.'''
        # update
        self.mean_entropy = torch.tensor(self.entropies).mean().item()
        self.mean_log_prob = torch.tensor(self.log_probs).mean().item()
        # net.grad_norms is only available in dev mode for efficiency
        grad_norms = net_util.get_grad_norms(self.agent.algorithm)
        self.mean_grad_norm = np.nan if ps.is_empty(grad_norms) else np.mean(
            grad_norms)

        # flush
        self.action_tensor = None
        self.action_pd = None
        self.entropies = []
        self.log_probs = []