def run(self, wait=True, capture_error=False):
        """Run the process.

        Args:
            wait (bool): A boolean indicating whether to wait and check for errors.
                Defaults to True.
            capture_error (bool): A boolean indicating whether to direct stderr to a stream
                that can later be read. Defaults to False.

        Returns:
            process (subprocess.Popen): The spawned process.
        """
        self._setup()

        cmd = self._create_command()
        cmd.extend(super(SMDataParallelRunner, self)._create_command())
        logging_config.log_script_invocation(cmd, self._env_vars)
        if wait:
            process_spawned = process.check_error(
                cmd,
                errors.ExecuteUserScriptError,
                capture_error=capture_error,
                cwd=environment.code_dir,
            )
        else:
            process_spawned = process.create(
                cmd,
                errors.ExecuteUserScriptError,
                capture_error=capture_error,
                cwd=environment.code_dir,
            )
        self._tear_down()
        return process_spawned
Пример #2
0
    def run(self, wait=True, capture_error=False):
        """Run the process.

        Args:
            wait (bool): A boolean indicating whether to wait and check for errors.
                Defaults to True.
            capture_error (bool): A boolean indicating whether to direct stderr to a stream
                that can later be read. Defaults to False.

        Returns:
            process (subprocess.Popen): The spawned process.
        """
        self._setup()

        cmd = self._create_command()

        logging_config.log_script_invocation(cmd, self._env_vars)

        training_env = environment.Environment()
        exception_classes = get_modelparallel_exception_classes()
        if wait:
            process_spawned = process.check_error(
                cmd,
                exception_classes
                if training_env.is_modelparallel_enabled
                else errors.ExecuteUserScriptError,
                self._processes_per_host,
                capture_error=capture_error,
                cwd=environment.code_dir,
            )
        else:
            _, _, process_spawned = process.create(
                cmd,
                exception_classes
                if training_env.is_modelparallel_enabled
                else errors.ExecuteUserScriptError,
                self._processes_per_host,
                capture_error=capture_error,
                cwd=environment.code_dir,
            )

        self._tear_down()
        return process_spawned