def evaluate(self): """Evaluate model.""" self.pi.eval() misc.set_env_to_eval_mode(self.env) # Eval policy os.makedirs(os.path.join(self.logdir, 'eval'), exist_ok=True) outfile = os.path.join(self.logdir, 'eval', self.ckptr.format.format(self.t) + '.json') stats = rl_evaluate(self.env, self.pi, self.eval_num_episodes, outfile, self.device) logger.add_scalar('eval/mean_episode_reward', stats['mean_reward'], self.t, time.time()) logger.add_scalar('eval/mean_episode_length', stats['mean_length'], self.t, time.time()) # Record policy # os.makedirs(os.path.join(self.logdir, 'video'), exist_ok=True) # outfile = os.path.join(self.logdir, 'video', # self.ckptr.format.format(self.t) + '.mp4') # rl_record(self.env, self.pi, self.record_num_episodes, outfile, # self.device) self.pi.train() misc.set_env_to_train_mode(self.env)
def evaluate(self): """Evaluate.""" eval_env = VecEpsilonGreedy(VecFrameStack(self.env, self.frame_stack), self.eval_eps) self.qf.eval() misc.set_env_to_eval_mode(eval_env) # Eval policy os.makedirs(os.path.join(self.logdir, 'eval'), exist_ok=True) outfile = os.path.join(self.logdir, 'eval', self.ckptr.format.format(self.t) + '.json') stats = rl_evaluate(eval_env, self.qf, self.eval_num_episodes, outfile, self.device) logger.add_scalar('eval/mean_episode_reward', stats['mean_reward'], self.t, time.time()) logger.add_scalar('eval/mean_episode_length', stats['mean_length'], self.t, time.time()) # Record policy os.makedirs(os.path.join(self.logdir, 'video'), exist_ok=True) outfile = os.path.join(self.logdir, 'video', self.ckptr.format.format(self.t) + '.mp4') rl_record(eval_env, self.qf, self.record_num_episodes, outfile, self.device) self.qf.train() misc.set_env_to_train_mode(self.env) self.data_manager.manual_reset()