def _evaluation_paths(self, policy, evaluation_env): if self._eval_n_episodes < 1: return () # TODO(hartikainen): I don't like this way of handling evaluation mode # for the policies. We should instead have two separete policies for # training and evaluation. with policy.evaluation_mode(): paths = rollouts(self._eval_n_episodes, evaluation_env, policy, self.sampler._max_path_length, render_kwargs=self._eval_render_kwargs) should_save_video = ( self._video_save_frequency > 0 and (self._epoch == 0 or (self._epoch + 1) % self._video_save_frequency == 0)) if should_save_video: fps = 1 // getattr(self._training_environment, 'dt', 1 / 30) for i, path in enumerate(paths): video_frames = path.pop('images') video_file_name = f'evaluation_path_{self._epoch}_{i}.mp4' video_file_path = os.path.join(os.getcwd(), 'videos', video_file_name) save_video(video_frames, video_file_path, fps=fps) return paths
def _evaluation_paths(self, policy, evaluation_env): if self._eval_n_episodes < 1: return () with policy.set_deterministic(self._eval_deterministic): paths = rollouts(self._eval_n_episodes, evaluation_env, policy, self.sampler._max_path_length, render_kwargs=self._eval_render_kwargs) should_save_video = ( self._video_save_frequency > 0 and (self._epoch == 0 or (self._epoch + 1) % self._video_save_frequency == 0)) if should_save_video: fps = 1 // getattr(self._training_environment, 'dt', 1 / 30) for i, path in enumerate(paths): video_frames = path.pop('images') video_file_name = f'evaluation_path_{self._epoch}_{i}.mp4' video_file_path = os.path.join(os.getcwd(), 'videos', video_file_name) save_video(video_frames, video_file_path, fps=fps) return paths
def simulate_policy(checkpoint_path, num_rollouts, max_path_length, render_kwargs, video_save_path=None, evaluation_environment_params=None): checkpoint_path = os.path.abspath(checkpoint_path.rstrip('/')) variant, progress, metadata = load_variant_progress_metadata( checkpoint_path) environment = load_environment(variant) policy = load_policy(checkpoint_path, variant, environment) render_kwargs = {**DEFAULT_RENDER_KWARGS, **render_kwargs} paths = rollouts(num_rollouts, environment, policy, path_length=max_path_length, render_kwargs=render_kwargs) if video_save_path and render_kwargs.get('mode') == 'rgb_array': fps = 1 // getattr(environment, 'dt', 1 / 30) for i, path in enumerate(paths): video_save_dir = os.path.expanduser('/tmp/simulate_policy/') video_save_path = os.path.join(video_save_dir, f'episode_{i}.mp4') save_video(path['images'], video_save_path, fps=fps) return paths