示例#1
0
    def run(self) -> None:
        conn: t.Text = self.get_connection_string()
        remote_env: PythonInterpreter = PythonInterpreter.create(
            self.mlcube.runner.interpreter)

        # The 'remote_path' variable points to the MLCube root directory on remote host.
        remote_path: t.Text = os.path.join(
            self.mlcube.runner.remote_root,
            os.path.basename(self.mlcube.runtime.root))

        try:
            cmd = f"mlcube run --mlcube=. --platform={self.mlcube.runner.platform} --task={self.task}"
            Shell.ssh(
                conn,
                f'{remote_env.activate_cmd(noop=":")} && cd {remote_path} && {cmd}'
            )
        except ExecutionError as err:
            raise ExecutionError.mlcube_run_error(
                self.__class__.__name__,
                f"Error occurred while running MLCube task (name={self.task}).",
                **err.context)

        # Sync back results
        try:
            # TODO: Only workspace/ directory is synced. Better solution?
            Shell.rsync_dirs(source=f'{conn}:{remote_path}/workspace/',
                             dest=f'{self.mlcube.runtime.root}/workspace/')
        except ExecutionError as err:
            raise ExecutionError.mlcube_run_error(
                self.__class__.__name__,
                "Error occurred while syncing workspace.", **err.context)
示例#2
0
    def configure(self) -> None:
        """Run 'configure' phase for SHH runner."""
        conn: t.Text = self.get_connection_string()
        remote_env: PythonInterpreter = PythonInterpreter.create(
            self.mlcube.runner.interpreter)

        # If required, create and configure python environment on remote host
        try:
            Shell.ssh(conn, remote_env.create_cmd())
        except ExecutionError as err:
            raise ExecutionError.mlcube_configure_error(
                self.__class__.__name__,
                f"Error occurred while creating remote python environment (env={remote_env}).",
                **err.context)
        try:
            Shell.ssh(conn, remote_env.configure_cmd())
        except ExecutionError as err:
            raise ExecutionError.mlcube_configure_error(
                self.__class__.__name__,
                f"Error occurred while configuring remote python environment (env={remote_env}).",
                **err.context)

        # The 'local_path' and 'remote_path' must both be directories.
        try:
            local_path: str = self.mlcube.runtime.root
            remote_path: str = os.path.join(self.mlcube.runner.remote_root,
                                            os.path.basename(local_path))
            Shell.ssh(conn, f'mkdir -p {remote_path}')
            Shell.rsync_dirs(source=f'{local_path}/',
                             dest=f'{conn}:{remote_path}/')
        except ExecutionError as err:
            raise ExecutionError.mlcube_configure_error(
                self.__class__.__name__,
                "Error occurred while syncing local and remote folders.",
                **err.context)

        # Configure remote MLCube runner. Idea is that we use chain of runners, for instance, SHH Runner -> Docker
        # runner. So, the runner to be used on a remote host must configure itself.
        try:
            cmd = f"mlcube configure --mlcube=. --platform={self.mlcube.runner.platform}"
            Shell.ssh(
                conn,
                f'{remote_env.activate_cmd(noop=":")} && cd {remote_path} && {cmd}'
            )
        except ExecutionError as err:
            raise ExecutionError.mlcube_configure_error(
                self.__class__.__name__,
                "Error occurred while configuring MLCube on a remote machine.",
                **err.context)
示例#3
0
def create() -> None:
    """ Create a new MLCube using cookiecutter template.
      - MLCube cookiecutter: https://github.com/mlcommons/mlcube_cookiecutter
      - Example: https://mlcommons.github.io/mlcube/tutorials/create-mlcube/
    """
    mlcube_cookiecutter_url = 'https://github.com/mlcommons/mlcube_cookiecutter'
    try:
        from cookiecutter.main import cookiecutter
        proj_dir: t.Text = cookiecutter(mlcube_cookiecutter_url)
        if proj_dir and os.path.isfile(os.path.join(proj_dir, 'mlcube.yaml')):
            Shell.run(['mlcube', 'describe', '--mlcube', proj_dir], on_error='die')
    except ImportError:
        print("Cookiecutter library not found.")
        print("\tInstall it: pip install cookiecutter")
        print(f"\tMore details: {mlcube_cookiecutter_url}")
    def configure(self) -> None:
        """Build Singularity Image on a current host."""
        SingularityRun.check_install()

        s_cfg: DictConfig = self.mlcube.runner

        # Get full path to a singularity image. By design, we compute it relative to {mlcube.root}/workspace.
        image_file = Path(s_cfg.image_dir, s_cfg.image)
        if image_file.exists():
            logger.info(
                "SingularityRun SIF exists (%s) - no need to run the configure step.",
                image_file,
            )
            return

        # Make sure a directory to store image exists. If paths are like "/opt/...", the call may fail.
        image_file.parent.mkdir(parents=True, exist_ok=True)

        build_path = Path(
            self.mlcube.runtime.root
        )  # Let's assume that build context is the root MLCube directory
        recipe: str = s_cfg.build_file  # This is the recipe file, or docker image.
        if recipe.startswith("docker://") or recipe.startswith(
                "docker-archive:"):
            # https://sylabs.io/guides/3.0/user-guide/build_a_container.html
            # URI beginning with docker:// to build from Docker Hub
            logger.info("SingularityRun building SIF from docker image (%s).",
                        recipe)
        else:
            # This must be a recipe file. Make sure it exists.
            if not Path(build_path, recipe).exists():
                raise IOError(
                    f"SIF recipe file does not exist (path={build_path}, file={recipe})"
                )
            logger.info("Building SIF from recipe file (path=%s, file=%s).",
                        build_path, recipe)
        try:
            Shell.run([
                'cd',
                str(build_path), ';', s_cfg.singularity, 'build',
                s_cfg.build_args,
                str(image_file), recipe
            ])
        except ExecutionError as err:
            raise ExecutionError.mlcube_configure_error(
                self.__class__.__name__,
                "Error occurred while building SIF image. See context for more details.",
                **err.context)
示例#5
0
 def test_run_01(self) -> None:
     for cmd in ('python --version', ['python', '--version']):
         for die_on_error in (True, False):
             exit_code = Shell.run(cmd, on_error='die')
             self.assertEqual(
                 exit_code, 0,
                 f"cmd = {cmd}, die_on_error = {die_on_error}")
示例#6
0
    def validate(mlcube: DictConfig) -> None:
        """ Initialize configuration from user config
        Args:
            mlcube: MLCube `container` configuration, possible merged with user local configuration.
        Return:
            Initialized configuration.
        """
        # Make sure all parameters present with their default values.
        validator = Validate(mlcube.runner, 'runner')
        _ = validator.check_unknown_keys(Config.DEFAULT.keys())\
                     .check_values(['image', 'docker', 'build_strategy'], str, blanks=False)
        Config.BuildStrategy.validate(mlcube.runner.build_strategy)

        if isinstance(mlcube.runner.build_args, DictConfig):
            mlcube.runner.build_args = Shell.to_cli_args(mlcube.runner.build_args, parent_arg='--build-arg')
        if isinstance(mlcube.runner.env_args, DictConfig):
            mlcube.runner.env_args = Shell.to_cli_args(mlcube.runner.env_args, parent_arg='-e')
示例#7
0
 def test_run_02(self) -> None:
     cmds = [
         'python -c "print(message)"',
         'python -c "import os, signal; os.kill(os.getpid(), signal.SIGUSR1);"',
         '8389dfb48c6f4a1aaa16bdda76c1fb11'
     ]
     for cmd in cmds:
         exit_code = Shell.run(cmd, on_error='ignore')
         self.assertGreater(exit_code, 0, f"cmd = {cmd}")
示例#8
0
    def configure(self) -> None:
        """Build Docker image on a current host."""
        image: t.Text = self.mlcube.runner.image
        context: t.Text = os.path.abspath(os.path.join(self.mlcube.runtime.root, self.mlcube.runner.build_context))
        recipe: t.Text = os.path.abspath(os.path.join(context, self.mlcube.runner.build_file))
        docker: t.Text = self.mlcube.runner.docker

        # Build strategies: `pull`, `auto` and `always`.
        build_strategy: t.Text = self.mlcube.runner.build_strategy
        build_recipe_exists: bool = os.path.exists(recipe)
        if build_strategy == Config.BuildStrategy.PULL or not build_recipe_exists:
            logger.info("Will pull image (%s) because (build_strategy=%s, build_recipe_exists=%r)",
                        image, build_strategy, build_recipe_exists)
            if build_recipe_exists:
                logger.warning(
                    "Docker recipe exists (%s), but your build strategy is `%s`, and so the image will be pulled, not "
                    "built. Make sure your image is up-to-date with your source code. If you want to rebuilt MLCube "
                    "docker image locally, rerun with `-Prunner.build_strategy=always`.",
                    recipe, build_strategy
                )
            try:
                Shell.run([docker, 'pull', image])
            except ExecutionError as err:
                description = f"Error occurred while pulling docker image (docker={docker}, image={image})."
                if build_recipe_exists:
                    description += \
                        f" By the way, docker recipe ({recipe}) exists, but your build strategy is set to "\
                        "pull. Consider rerunning with: `-Prunner.build_strategy=auto` to build image locally."
                raise ExecutionError.mlcube_configure_error(self.__class__.__name__, description, **err.context)

        else:
            logger.info("Will build image (%s) because (build_strategy=%s, build_recipe_exists=%r)",
                        image, build_strategy, build_recipe_exists)
            build_args: t.Text = self.mlcube.runner.build_args
            try:
                Shell.run([docker, 'build', build_args, '-t', image, '-f', recipe, context])
            except ExecutionError as err:
                raise ExecutionError.mlcube_configure_error(
                    self.__class__.__name__,
                    f"Error occurred while building docker image (docker={docker}, build_args={build_args}, "
                    f"image={image}, recipe={recipe}, context={context}).",
                    **err.context
                )
示例#9
0
    def run(self) -> None:
        """ Run a cube. """
        docker: t.Text = self.mlcube.runner.docker
        image: t.Text = self.mlcube.runner.image

        build_strategy: t.Text = self.mlcube.runner.build_strategy
        if build_strategy == Config.BuildStrategy.ALWAYS or not Shell.docker_image_exists(docker, image):
            logger.warning("Docker image (%s) does not exist or build strategy is 'always'. "
                           "Will run 'configure' phase.", image)
            self.configure()
        # Deal with user-provided workspace
        try:
            Shell.sync_workspace(self.mlcube, self.task)
        except Exception as err:
            raise ExecutionError.mlcube_run_error(
                self.__class__.__name__,
                f"Error occurred while syncing MLCube workspace (task={self.task}). Actual error is {type(err)} - see "
                "context for details.",
                error=str(err)
            )

        # The 'mounts' dictionary maps host paths to container paths
        try:
            mounts, task_args = Shell.generate_mounts_and_args(self.mlcube, self.task)
        except ConfigurationError as err:
            raise ExecutionError.mlcube_run_error(
                self.__class__.__name__,
                f"Error occurred while generating mount points for docker run command (task={self.task}). See context "
                "for details and check your MLCube configuration file.",
                error=str(err)
            )
        logger.info(f"mounts={mounts}, task_args={task_args}")

        volumes = Shell.to_cli_args(mounts, sep=':', parent_arg='--volume')
        env_args = self.mlcube.runner.env_args
        num_gpus: int = self.mlcube.platform.get('accelerator_count', None) or 0
        run_args: t.Text = self.mlcube.runner.cpu_args if num_gpus == 0 else self.mlcube.runner.gpu_args
        try:
            Shell.run([docker, 'run', run_args, env_args, volumes, image, ' '.join(task_args)])
        except ExecutionError as err:
            raise ExecutionError.mlcube_run_error(
                self.__class__.__name__,
                f"Error occurred while running MLCube task (docker={docker}, run_args={run_args}, env_args={env_args}, "
                f"volumes={volumes}, image={image}, task_args={task_args}).",
                **err.context
            )
示例#10
0
    def run(self) -> None:
        """ """
        image_file = Path(
            self.mlcube.runner.image_dir) / self.mlcube.runner.image
        if not image_file.exists():
            self.configure()
        else:
            SingularityRun.check_install()

        # Deal with user-provided workspace
        try:
            Shell.sync_workspace(self.mlcube, self.task)
        except Exception as err:
            raise ExecutionError.mlcube_run_error(
                self.__class__.__name__,
                "Error occurred while syncing MLCube workspace. See context for more details.",
                error=str(err))

        try:
            mounts, task_args = Shell.generate_mounts_and_args(
                self.mlcube, self.task)
            logger.info(f"mounts={mounts}, task_args={task_args}")
        except ConfigurationError as err:
            raise ExecutionError.mlcube_run_error(
                self.__class__.__name__,
                "Error occurred while generating mount points for singularity run command. See context for more "
                "details and check your MLCube configuration file.",
                error=str(err))

        volumes = Shell.to_cli_args(mounts, sep=":", parent_arg="--bind")
        try:
            Shell.run([
                self.mlcube.runner.singularity, 'run',
                self.mlcube.runner.run_args, volumes,
                str(image_file), ' '.join(task_args)
            ])
        except ExecutionError as err:
            raise ExecutionError.mlcube_run_error(
                self.__class__.__name__,
                f"Error occurred while running MLCube task (task={self.task}). See context for more details.",
                **err.context)
示例#11
0
 def test_run_03(self) -> None:
     with self.assertRaises(ExecutionError):
         _ = Shell.run('python -c "print(message)"', on_error='raise')