示例#1
0
 def _after_step(self, rollout, data, outs):
     logger.log(
         "========================|  Timestep: {}  |========================"
         .format(self.t))
     logger.logkv('serial timesteps', self.t / self.nenv)
     logger.logkv('mean episode length',
                  np.mean(self.runner.get_episode_lengths()))
     logger.logkv('mean episode reward',
                  np.mean(self.runner.get_episode_rewards()))
     logger.logkv(
         'fps',
         int((self.t - self._t_start) / (time.time() - self._time_start)))
     logger.logkv('time_elapsed', time.time() - self._time_start)
     logger.logkv('time spent exploring', self.actor.eps)
     logger.dumpkvs()
示例#2
0
 def _update_model(self, data):
     if self.rn is not None:
         self._update_running_norm(data['obs'].reshape(
             -1, *data['obs'].shape[2:]))
     dataset, _ = util.make_dataset(data)
     batch = dataset.data_map
     for _ in range(self.args.epochs):
         losses = []
         for b in dataset.iterate_once(self.batch_size * self.nenv):
             out = self._update(b)
             losses.append([out[k] for k in self._loss_keys])
         meanlosses = np.array(losses).mean(axis=0)
         s = 'Losses:  '
         for i, ln in enumerate(self._loss_names):
             s += ln + ': {:08f}  '.format(meanlosses[i])
         logger.log(s)
     return meanlosses
示例#3
0
文件: ppo.py 项目: neuroph12/nlimb
    def _update_model(self, data):
        if self.rn is not None:
            self._update_running_norm(data['obs'].reshape(-1, *data['obs'].shape[2:]))
        dataset, state_init = util.make_dataset(data, self.loss.is_recurrent)
        for i in range(self.args.epochs_per_iter):
            losses = []
            state = state_init
            for batch in dataset.iterate_once(self.batch_size * self.nenv):
                out = self._update(batch, state)
                state = out['state_out']
                losses.append([out['out'], out['p_loss'], out['v_loss'], out['ent_loss']])

            meanlosses = np.array(losses).mean(axis=0)
            s = 'Losses:  '
            for i,ln in enumerate(['Total', 'Policy', 'Value', 'Entropy']):
                s += ln + ': {:08f}  '.format(meanlosses[i])
            logger.log(s)
        return meanlosses
示例#4
0
文件: a2c.py 项目: neuroph12/nlimb
    def _after_step(self, rollout, data, losses):
        self.losses.append([losses['out'], losses['p_loss'], losses['v_loss'], losses['ent_loss']])
        self.vtargs.extend(list(np.array(data['vtarg']).flatten()))
        self.vpreds.extend(list(np.array(data['vpreds']).flatten()))

        self.nsteps += 1
        if self.nsteps % 100 == 0 and self.nsteps > 0:
            logger.log("========================|  Timestep: {}  |========================".format(self.t))
            meanlosses = np.mean(np.array(self.losses), axis=0)
            # Logging stats...
            for i,s in enumerate(['Total Loss', 'Policy Loss', 'Value Loss', 'Entropy']):
                logger.logkv(s, meanlosses[i])
            logger.logkv('timesteps', self.t)
            logger.logkv('serial timesteps', self.t / self.nenv)
            logger.logkv('mean episode length', np.mean(self.runner.get_episode_lengths()))
            logger.logkv('mean episode reward', np.mean(self.runner.get_episode_rewards()))
            logger.logkv('explained var. of vtarg', util.explained_variance(np.array(self.vpreds), np.array(self.vtargs)))
            logger.logkv('fps', int((self.t - self._t_start) / (time.time() - self._time_start)))
            logger.logkv('time_elapsed', time.time() - self._time_start)
            logger.dumpkvs()
示例#5
0
 def _after_step(self, rollout, data, outs):
     self.nsteps += 1
     if self.nsteps % 100 == 0:
         logger.log(
             "========================|  Timestep: {}  |========================"
             .format(self.t))
         meanloss = np.mean(np.array(self.losses), axis=0)
         # Logging stats...
         logger.logkv('Loss', meanloss)
         logger.logkv('timesteps', self.t)
         logger.logkv('serial timesteps', self.t / self.nenv)
         logger.logkv('mean episode length',
                      np.mean(self.runner.get_episode_lengths()))
         logger.logkv('mean episode reward',
                      np.mean(self.runner.get_episode_rewards()))
         logger.logkv(
             'fps',
             int((self.t - self._t_start) /
                 (time.time() - self._time_start)))
         logger.logkv('time_elapsed', time.time() - self._time_start)
         logger.logkv('time spent exploring', self.actor.eps)
         logger.dumpkvs()
示例#6
0
 def _after_step(self, rollout, data, update_outs):
     logger.log("After Step")
示例#7
0
 def _before_step(self):
     logger.log("Before Step")
示例#8
0
文件: ppo.py 项目: neuroph12/nlimb
 def _before_step(self):
     logger.log("========================|  Iteration: {}  |========================".format(self.t // self.timesteps_per_step))