Пример #1
0
    def stop_job(self,
                 ssh_client,
                 name,
                 job_options,
                 is_singularity,
                 logger,
                 workdir=None):
        """
        Stops a job from the HPC

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @type name: string
        @param name: name of the job
        @type job_settings: dictionary
        @param job_settings: dictionary with the job options
        @type is_singularity: bool
        @param is_singularity: True if the job is in a container
        @rtype string
        @return Slurm's job name stopped. None if an error arise.
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        if job_options['type'] == "SPARK":
            call = self._build_job_cancellation_call(name, ssh_client, logger)
        else:
            call = self._build_job_cancellation_call(name, job_options, logger)
        if call is None:
            return False

        if not SshClient.check_ssh_client(ssh_client, logger):
            return False
        return ssh_client.execute_shell_command(call, workdir=workdir)
Пример #2
0
    def clean_job_aux_files(self,
                            ssh_client,
                            name,
                            job_options,
                            is_singularity,
                            logger,
                            workdir=None):
        """
        Cleans no more needed job files in the HPC

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @type name: string
        @param name: name of the job
        @type job_settings: dictionary
        @param job_settings: dictionary with the job options
        @type is_singularity: bool
        @param is_singularity: True if the job is in a container
        @rtype string
        @return Slurm's job name stopped. None if an error arise.
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        if is_singularity:
            return ssh_client.execute_shell_command("rm " + name + ".script",
                                                    workdir=workdir)
        return True
Пример #3
0
    def publish(self, ssh_client, logger, workdir=None):
        """
        Publish the local file in the external repository

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @rtype string
        @return False if something went wrong
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        call = self._build_publish_call(logger)
        if call is None:
            return False

        return ssh_client.execute_shell_command(call,
                                                workdir=workdir,
                                                wait_result=False)
Пример #4
0
    def submit_job(self,
                   ssh_client,
                   name,
                   job_settings,
                   is_singularity,
                   logger,
                   workdir=None,
                   context=None):
        """
        Sends a job to the HPC

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @type name: string
        @param name: name of the job
        @type job_settings: dictionary
        @param job_settings: dictionary with the job options
        @type is_singularity: bool
        @param is_singularity: True if the job is in a container
        @rtype string
        @param logger: Logger object to print log messages
        @rtype logger
        @param workdir: Path of the working directory of the job
        @rtype string
        @param context: Dictionary containing context env vars
        @rtype dictionary of strings
        @return Slurm's job name sent. None if an error arise.
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        # Build script if there is no one, or Singularity
        if 'script' not in job_settings or is_singularity:
            # generate script content
            if is_singularity:
                script_content = self._build_container_script(
                    name, job_settings, logger)
            else:
                script_content = self._build_script(name, job_settings, logger)

            if script_content is None:
                return False

            if not self._create_shell_script(ssh_client,
                                             name + ".script",
                                             script_content,
                                             logger,
                                             workdir=workdir):
                return False

            # @TODO: use more general type names (e.g., BATCH/INLINE, etc)
            settings = {"script": name + ".script"}

            if 'arguments' in job_settings:
                settings['arguments'] = job_settings['arguments']

            if 'scale' in job_settings:
                settings['scale'] = job_settings['scale']
                if 'scale_max_in_parallel' in job_settings:
                    settings['scale_max_in_parallel'] = \
                        job_settings['scale_max_in_parallel']
        else:
            settings = job_settings

        # build the call to submit the job
        response = self._build_job_submission_call(name, settings)

        if 'error' in response:
            logger.error("Couldn't build the call to send the job: " +
                         response['error'])
            return False

        # prepare the scale env variables
        if 'scale_env_mapping_call' in response:
            scale_env_mapping_call = response['scale_env_mapping_call']
            output, exit_code = ssh_client.execute_shell_command(
                scale_env_mapping_call, workdir=workdir, wait_result=True)
            if exit_code != 0:
                logger.error("Scale env vars mapping '" +
                             scale_env_mapping_call + "' failed with code " +
                             str(exit_code) + ":\n" + output)
                return False

        # submit the job
        call = response['call']

        output, exit_code = ssh_client.execute_shell_command(call,
                                                             env=context,
                                                             workdir=workdir,
                                                             wait_result=True)
        if exit_code != 0:
            logger.error("Job submission '" + call + "' exited with code " +
                         str(exit_code) + ":\n" + output)
            return False
        return True
Пример #5
0
    def submit_job(self,
                   ssh_client,
                   name,
                   job_settings,
                   is_singularity,
                   logger,
                   workdir=None,
                   context=None):
        """
        Sends a job to the HPC

        @type ssh_client: SshClient
        @param ssh_client: ssh client connected to an HPC login node
        @type name: string
        @param name: name of the job
        @type job_settings: dictionary
        @param job_settings: dictionary with the job options
        @type is_singularity: bool
        @param is_singularity: True if the job is in a container
        @rtype string
        @param logger: Logger object to print log messages
        @rtype logger
        @param workdir: Path of the working directory of the job
        @rtype string
        @param context: Dictionary containing context env vars
        @rtype dictionary of strings
        @return Slurm's job name sent. None if an error arise.
        """
        if not SshClient.check_ssh_client(ssh_client, logger):
            return False

        if is_singularity:
            # generate script content for singularity
            script_content = self._build_container_script(
                name, job_settings, logger)
            if script_content is None:
                return False

            if not self._create_shell_script(ssh_client,
                                             name + ".script",
                                             script_content,
                                             logger,
                                             workdir=workdir):
                return False

            # @TODO: use more general type names (e.g., BATCH/INLINE, etc)
            settings = {"type": "SBATCH", "command": name + ".script"}

            if 'scale' in job_settings:
                settings['scale'] = job_settings['scale']
                if 'scale_max_in_parallel' in job_settings:
                    settings['scale_max_in_parallel'] = \
                        job_settings['scale_max_in_parallel']
        else:
            settings = job_settings

        # build the call to submit the job
        response = self._build_job_submission_call(name, settings, logger)

        if 'error' in response:
            logger.error("Couldn't build the call to send the job: " +
                         response['error'])
            return False

        # prepare the scale env variables
        if 'scale_env_mapping_call' in response:
            scale_env_mapping_call = response['scale_env_mapping_call']
            output, exit_code = ssh_client.execute_shell_command(
                scale_env_mapping_call, workdir=workdir, wait_result=True)
            if exit_code != 0:
                logger.error("Scale env vars mapping '" +
                             scale_env_mapping_call + "' failed with code " +
                             str(exit_code) + ":\n" + output)
                return False

        # submit the job
        call = response['call']
        if (settings['type'] == 'SPARK'):
            exit_code = ssh_client.execute_shell_command(call,
                                                         env=context,
                                                         workdir=workdir,
                                                         wait_result=False)
            if exit_code is True:
                exit_code = 0
            logger.debug("Job execution with exit code : " + str(exit_code))
            import time
            time.sleep(30)
        else:
            output, exit_code = ssh_client.execute_shell_command(
                call, env=context, workdir=workdir, wait_result=True)
        # if (job_settings['type'] == 'SPARK'):
        #    output, exit_code = ssh_client.execute_shell_command(   \
        #        call, env=context, workdir=workdir, wait_result=False)
        # else:
        #    output, exit_code = ssh_client.execute_shell_command(   \
        #        call, env=context, workdir=workdir, wait_result=True)
        if exit_code != 0:
            logger.error("Job submission '" + call + "' exited with code " +
                         str(exit_code) + ":\n" + output)
            return False

        # Job is successfully submitted, get the framework ID info
        # to manage the jobs in future
        # if (settings['type'] == 'SPARK'):
        # Parse output to get the framework ID
        #    framework_id = _parse_spark_output(output)
        # Store framework_id in each executables
        return True