Пример #1
0
    def _evaluation_paths(self, policy, evaluation_env):
        if self._eval_n_episodes < 1: return ()

        # TODO(hartikainen): I don't like this way of handling evaluation mode
        # for the policies. We should instead have two separete policies for
        # training and evaluation.
        with policy.evaluation_mode():
            paths = rollouts(self._eval_n_episodes,
                             evaluation_env,
                             policy,
                             self.sampler._max_path_length,
                             render_kwargs=self._eval_render_kwargs)

        should_save_video = (
            self._video_save_frequency > 0
            and (self._epoch == 0 or
                 (self._epoch + 1) % self._video_save_frequency == 0))

        if should_save_video:
            fps = 1 // getattr(self._training_environment, 'dt', 1 / 30)
            for i, path in enumerate(paths):
                video_frames = path.pop('images')
                video_file_name = f'evaluation_path_{self._epoch}_{i}.mp4'
                video_file_path = os.path.join(os.getcwd(), 'videos',
                                               video_file_name)
                save_video(video_frames, video_file_path, fps=fps)

        return paths
Пример #2
0
    def _evaluation_paths(self, policy, evaluation_env):
        if self._eval_n_episodes < 1: return ()

        with policy.set_deterministic(self._eval_deterministic):
            paths = rollouts(self._eval_n_episodes,
                             evaluation_env,
                             policy,
                             self.sampler._max_path_length,
                             render_kwargs=self._eval_render_kwargs)

        should_save_video = (
            self._video_save_frequency > 0
            and (self._epoch == 0 or
                 (self._epoch + 1) % self._video_save_frequency == 0))

        if should_save_video:
            fps = 1 // getattr(self._training_environment, 'dt', 1 / 30)
            for i, path in enumerate(paths):
                video_frames = path.pop('images')
                video_file_name = f'evaluation_path_{self._epoch}_{i}.mp4'
                video_file_path = os.path.join(os.getcwd(), 'videos',
                                               video_file_name)
                save_video(video_frames, video_file_path, fps=fps)

        return paths
Пример #3
0
def simulate_policy(checkpoint_path,
                    num_rollouts,
                    max_path_length,
                    render_kwargs,
                    video_save_path=None,
                    evaluation_environment_params=None):
    checkpoint_path = os.path.abspath(checkpoint_path.rstrip('/'))
    variant, progress, metadata = load_variant_progress_metadata(
        checkpoint_path)
    environment = load_environment(variant)
    policy = load_policy(checkpoint_path, variant, environment)
    render_kwargs = {**DEFAULT_RENDER_KWARGS, **render_kwargs}

    paths = rollouts(num_rollouts,
                     environment,
                     policy,
                     path_length=max_path_length,
                     render_kwargs=render_kwargs)

    if video_save_path and render_kwargs.get('mode') == 'rgb_array':
        fps = 1 // getattr(environment, 'dt', 1 / 30)
        for i, path in enumerate(paths):
            video_save_dir = os.path.expanduser('/tmp/simulate_policy/')
            video_save_path = os.path.join(video_save_dir, f'episode_{i}.mp4')
            save_video(path['images'], video_save_path, fps=fps)

    return paths