Пример #1
0
    def evaluate(self):
        """Evaluate model."""
        self.pi.eval()
        misc.set_env_to_eval_mode(self.env)

        # Eval policy
        os.makedirs(os.path.join(self.logdir, 'eval'), exist_ok=True)
        outfile = os.path.join(self.logdir, 'eval',
                               self.ckptr.format.format(self.t) + '.json')
        stats = rl_evaluate(self.env, self.pi, self.eval_num_episodes, outfile,
                            self.device)
        logger.add_scalar('eval/mean_episode_reward', stats['mean_reward'],
                          self.t, time.time())
        logger.add_scalar('eval/mean_episode_length', stats['mean_length'],
                          self.t, time.time())

        # Record policy
        # os.makedirs(os.path.join(self.logdir, 'video'), exist_ok=True)
        # outfile = os.path.join(self.logdir, 'video',
        #                        self.ckptr.format.format(self.t) + '.mp4')
        # rl_record(self.env, self.pi, self.record_num_episodes, outfile,
        #           self.device)

        self.pi.train()
        misc.set_env_to_train_mode(self.env)
Пример #2
0
    def evaluate(self):
        """Evaluate."""
        eval_env = VecEpsilonGreedy(VecFrameStack(self.env, self.frame_stack),
                                    self.eval_eps)
        self.qf.eval()
        misc.set_env_to_eval_mode(eval_env)

        # Eval policy
        os.makedirs(os.path.join(self.logdir, 'eval'), exist_ok=True)
        outfile = os.path.join(self.logdir, 'eval',
                               self.ckptr.format.format(self.t) + '.json')
        stats = rl_evaluate(eval_env, self.qf, self.eval_num_episodes, outfile,
                            self.device)
        logger.add_scalar('eval/mean_episode_reward', stats['mean_reward'],
                          self.t, time.time())
        logger.add_scalar('eval/mean_episode_length', stats['mean_length'],
                          self.t, time.time())

        # Record policy
        os.makedirs(os.path.join(self.logdir, 'video'), exist_ok=True)
        outfile = os.path.join(self.logdir, 'video',
                               self.ckptr.format.format(self.t) + '.mp4')
        rl_record(eval_env, self.qf, self.record_num_episodes, outfile,
                  self.device)

        self.qf.train()
        misc.set_env_to_train_mode(self.env)
        self.data_manager.manual_reset()